├── .gitignore
├── .rubocop.yml
├── .rubocop_todo.yml
├── Gemfile
├── Gemfile.lock
├── LICENSE
├── README.md
├── Rakefile
├── danbooru.rb
├── features
    ├── download_images_by_pool.feature
    ├── download_images_by_special.feature
    ├── download_images_by_tag.feature
    ├── images
    │   └── .gitkeep
    ├── step_definitions
    │   ├── download_images_by_pool_steps.rb
    │   ├── download_images_by_special_steps.steps.rb
    │   ├── download_images_by_tag_steps.rb
    │   └── download_images_steps.rb
    └── support
    │   └── env.rb
└── lib
    ├── behoimi.rb
    ├── booru.rb
    ├── booru
        ├── base.rb
        ├── pool.rb
        └── posts.rb
    ├── danbooru.rb
    ├── konachan.rb
    ├── testbooru.rb
    └── yandere.rb


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | images
3 | .bundle
4 | bundle
5 | vendor/bundle
6 | features/images
7 | !features/images/.gitkeep
8 | 


--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
 1 | require:
 2 |   - rubocop-performance
 3 |   - rubocop-rake
 4 |   - rubocop-rspec
 5 | 
 6 | inherit_from: .rubocop_todo.yml
 7 | 
 8 | AllCops:
 9 |   Exclude:
10 |     - 'bundle/**/*'
11 |     - 'vendor/**/*'
12 |     - 'images/**/*'
13 |   TargetRubyVersion: 2.7
14 |   NewCops: enable
15 | 
16 | Style/FrozenStringLiteralComment:
17 |   Enabled: false
18 | 


--------------------------------------------------------------------------------
/.rubocop_todo.yml:
--------------------------------------------------------------------------------
 1 | Layout/LineLength:
 2 |   Max: 120
 3 | 
 4 | Metrics/AbcSize:
 5 |   Max: 41
 6 | 
 7 | Metrics/ClassLength:
 8 |   Max: 128
 9 | 
10 | Metrics/CyclomaticComplexity:
11 |   Max: 10
12 | 
13 | Metrics/MethodLength:
14 |   Max: 34
15 | 
16 | Metrics/ParameterLists:
17 |   Max: 7
18 |   MaxOptionalParameters: 6
19 | 
20 | Metrics/PerceivedComplexity:
21 |   Max: 13
22 | 
23 | Style/Documentation:
24 |   Enabled: false
25 | 
26 | Style/MutableConstant:
27 |   Exclude:
28 |     - 'lib/behoimi.rb'
29 |     - 'lib/booru/base.rb'
30 |     - 'lib/danbooru.rb'
31 |     - 'lib/testbooru.rb'
32 |     - 'lib/konachan.rb'
33 |     - 'lib/yandere.rb'
34 | 
35 | Style/OptionalBooleanParameter:
36 |   Exclude:
37 |     - 'lib/booru/base.rb'
38 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | source 'https://rubygems.org'
 2 | 
 3 | gem 'json'
 4 | gem 'nokogiri'
 5 | 
 6 | group :development, :test do
 7 |   # gem 'debug', platforms: %i[mri mingw x64_mingw]
 8 |   gem 'cucumber', require: false
 9 |   gem 'rake', require: false
10 |   gem 'rspec', require: false
11 |   gem 'rubocop', require: false
12 |   gem 'rubocop-performance', require: false
13 |   gem 'rubocop-rake', require: false
14 |   gem 'rubocop-rspec', require: false
15 |   gem 'ruby-filemagic', require: false
16 | end
17 | 


--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
  1 | GEM
  2 |   remote: https://rubygems.org/
  3 |   specs:
  4 |     ast (2.4.2)
  5 |     bigdecimal (3.1.8)
  6 |     builder (3.3.0)
  7 |     cucumber (9.2.0)
  8 |       builder (~> 3.2)
  9 |       cucumber-ci-environment (> 9, < 11)
 10 |       cucumber-core (> 13, < 14)
 11 |       cucumber-cucumber-expressions (~> 17.0)
 12 |       cucumber-gherkin (> 24, < 28)
 13 |       cucumber-html-formatter (> 20.3, < 22)
 14 |       cucumber-messages (> 19, < 25)
 15 |       diff-lcs (~> 1.5)
 16 |       mini_mime (~> 1.1)
 17 |       multi_test (~> 1.1)
 18 |       sys-uname (~> 1.2)
 19 |     cucumber-ci-environment (10.0.1)
 20 |     cucumber-core (13.0.3)
 21 |       cucumber-gherkin (>= 27, < 28)
 22 |       cucumber-messages (>= 20, < 23)
 23 |       cucumber-tag-expressions (> 5, < 7)
 24 |     cucumber-cucumber-expressions (17.1.0)
 25 |       bigdecimal
 26 |     cucumber-gherkin (27.0.0)
 27 |       cucumber-messages (>= 19.1.4, < 23)
 28 |     cucumber-html-formatter (21.7.0)
 29 |       cucumber-messages (> 19, < 27)
 30 |     cucumber-messages (22.0.0)
 31 |     cucumber-tag-expressions (6.1.0)
 32 |     diff-lcs (1.5.1)
 33 |     ffi (1.17.0)
 34 |     json (2.7.2)
 35 |     language_server-protocol (3.17.0.3)
 36 |     mini_mime (1.1.5)
 37 |     mini_portile2 (2.8.7)
 38 |     multi_test (1.1.0)
 39 |     nokogiri (1.16.7)
 40 |       mini_portile2 (~> 2.8.2)
 41 |       racc (~> 1.4)
 42 |     parallel (1.26.3)
 43 |     parser (3.3.5.0)
 44 |       ast (~> 2.4.1)
 45 |       racc
 46 |     racc (1.8.1)
 47 |     rainbow (3.1.1)
 48 |     rake (13.2.1)
 49 |     regexp_parser (2.9.2)
 50 |     rspec (3.13.0)
 51 |       rspec-core (~> 3.13.0)
 52 |       rspec-expectations (~> 3.13.0)
 53 |       rspec-mocks (~> 3.13.0)
 54 |     rspec-core (3.13.2)
 55 |       rspec-support (~> 3.13.0)
 56 |     rspec-expectations (3.13.3)
 57 |       diff-lcs (>= 1.2.0, < 2.0)
 58 |       rspec-support (~> 3.13.0)
 59 |     rspec-mocks (3.13.2)
 60 |       diff-lcs (>= 1.2.0, < 2.0)
 61 |       rspec-support (~> 3.13.0)
 62 |     rspec-support (3.13.1)
 63 |     rubocop (1.67.0)
 64 |       json (~> 2.3)
 65 |       language_server-protocol (>= 3.17.0)
 66 |       parallel (~> 1.10)
 67 |       parser (>= 3.3.0.2)
 68 |       rainbow (>= 2.2.2, < 4.0)
 69 |       regexp_parser (>= 2.4, < 3.0)
 70 |       rubocop-ast (>= 1.32.2, < 2.0)
 71 |       ruby-progressbar (~> 1.7)
 72 |       unicode-display_width (>= 2.4.0, < 3.0)
 73 |     rubocop-ast (1.32.3)
 74 |       parser (>= 3.3.1.0)
 75 |     rubocop-performance (1.22.1)
 76 |       rubocop (>= 1.48.1, < 2.0)
 77 |       rubocop-ast (>= 1.31.1, < 2.0)
 78 |     rubocop-rake (0.6.0)
 79 |       rubocop (~> 1.0)
 80 |     rubocop-rspec (3.1.0)
 81 |       rubocop (~> 1.61)
 82 |     ruby-filemagic (0.7.3)
 83 |     ruby-progressbar (1.13.0)
 84 |     sys-uname (1.3.0)
 85 |       ffi (~> 1.1)
 86 |     unicode-display_width (2.6.0)
 87 | 
 88 | PLATFORMS
 89 |   ruby
 90 | 
 91 | DEPENDENCIES
 92 |   cucumber
 93 |   json
 94 |   nokogiri
 95 |   rake
 96 |   rspec
 97 |   rubocop
 98 |   rubocop-performance
 99 |   rubocop-rake
100 |   rubocop-rspec
101 |   ruby-filemagic
102 | 
103 | BUNDLED WITH
104 |    2.5.22
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Ivan Larionov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Overview
 2 | 
 3 | danbooru-ruby-grabber is a danbooru downloader — simple script which downloads images from danbooru.donmai.us, konachan.com, behoimi.org and yande.re. Support of any danbooru-powered site could be added easily.
 4 | 
 5 | ## Usage
 6 | 
 7 | ```
 8 | Usage: danbooru.rb [options] "tags"
 9 | 
10 | Target:
11 |     -b, --board BOARDNAME            Target board. Supported options: danbooru (default), konachan, behoimi, yandere
12 |     -P, --pool POOL_ID               Pool ID (tags will be ignored)
13 | 
14 | Storage options:
15 |     -s, --storage DIR                Storage mode (all images in one dir and symlinks in tagged dirs)
16 |     -d, --directory BASE_DIR         Base directory to save images. By default it uses the same location as script
17 |     -f, --filename PATTERN           Filename pattern. Supported options: id (default), md5, tags, url (old default)
18 |     Note: `-f tags` could miss some files due to filesystems' filename length limitation.
19 | 
20 | Authentication:
21 |     This is optional, but recommended since some boards block access without authentication.
22 |     -u, --user USERNAME              Username
23 |     -p, --password PASSWORD          Password (API Key for danbooru)
24 | 
25 | Tools:
26 |     Ruby's file saver is used by default. You can change it using this options. `wget` or `curl` binaries should be available.
27 |     -w, --wget                       Download using wget
28 |     -c, --curl                       Download using curl
29 | 
30 | Limits:
31 |     This option could be used multiple times with different limiters.
32 |     -l, --limit LIMITER              Limiters in the following format: limiter=number. Supported limiters: pages, posts, per_page
33 | 
34 | Help:
35 |     -h, --help                       Print a help message
36 | ```
37 | 
38 | ## Notes
39 | 
40 | * To prevent duplicates files are stored using post id based filenames. You can change this behavior using `-f` option.
41 | 
42 | * Using `-f tags` could miss some files due to filesystems' filename length limitation.
43 | 
44 | * Always use `-u` and `-p` options with danbooru, because they block requests without login and password.
45 | 
46 | * Number of tags you can use at the same time could be limited by board (for example danbooru limits basic accounts by 2 tags)
47 | 
48 | * Images are stored in tag-named directory.
49 | 
50 | * Script creates file named `files.bbs` with all tags of each image.
51 | 
52 | ## Installation
53 | 
54 | You need json and nokogiri gems to be installed. You can install them by this command:
55 | 
56 | `gem install json nokogiri`
57 | 
58 | Note: may be you need to use `sudo`.
59 | 
60 | ## Bonus
61 | 
62 | Have fun.
63 | 
64 | * Author: Xeron
65 | * E-mail: xeron.oskom@gmail.com
66 | * Homepage: https://xeron.me
67 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'fileutils'
 2 | require 'cucumber'
 3 | require 'cucumber/rake/task'
 4 | require 'rubocop/rake_task'
 5 | 
 6 | Cucumber::Rake::Task.new(:features) do |t|
 7 |   t.cucumber_opts = ['features', '--format', 'pretty']
 8 | end
 9 | 
10 | namespace :spec do
11 |   desc 'Clean up rbx compiled files and run cucumber tests'
12 |   Cucumber::Rake::Task.new(:ci) do
13 |     Dir.glob('**/*.rbc').each { |f| FileUtils.rm_f(f) }
14 |   end
15 | end
16 | 
17 | RuboCop::RakeTask.new
18 | 


--------------------------------------------------------------------------------
/danbooru.rb:
--------------------------------------------------------------------------------
  1 | # Author: Ivan "Xeron" Larionov
  2 | # E-mail: xeron.oskom@gmail.com
  3 | # Homepage: http://blog.xeron.me
  4 | # Version: 2.7
  5 | 
  6 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), 'lib')
  7 | require 'booru'
  8 | require 'danbooru'
  9 | require 'testbooru'
 10 | require 'konachan'
 11 | require 'behoimi'
 12 | require 'yandere'
 13 | 
 14 | options = {}
 15 | options[:limits] = {}
 16 | optparse = OptionParser.new do |opts| # rubocop:disable Metrics/BlockLength
 17 |   opts.banner = 'Usage: danbooru.rb [options] "tags"'
 18 | 
 19 |   opts.separator("\nTarget:")
 20 |   opts.on(
 21 |     '-b', '--board BOARDNAME',
 22 |     'Target board. Supported options: danbooru (default), konachan, behoimi, yandere'
 23 |   ) do |board|
 24 |     options[:board] = board.to_sym
 25 |   end
 26 |   opts.on(
 27 |     '-P', '--pool POOL_ID',
 28 |     'Pool ID (tags will be ignored)'
 29 |   ) do |pool|
 30 |     if /^[1-9][0-9]*/.match?(pool)
 31 |       options[:pool] = pool.to_i
 32 |     else
 33 |       warn "Wrong pool id: #{pool}. It should be a number greater than 0."
 34 |       exit 1
 35 |     end
 36 |   end
 37 | 
 38 |   opts.separator("\nStorage options:")
 39 |   opts.on(
 40 |     '-s', '--storage DIR',
 41 |     'Storage mode (all images in one dir and symlinks in tagged dirs)'
 42 |   ) do |dir|
 43 |     options[:storage] = dir
 44 |   end
 45 |   opts.on(
 46 |     '-d', '--directory BASE_DIR',
 47 |     'Base directory to save images. By default it uses the same location as script'
 48 |   ) do |base_path|
 49 |     options[:base_path] = base_path
 50 |   end
 51 |   opts.on(
 52 |     '-f', '--filename PATTERN',
 53 |     'Filename pattern. Supported options: id (default), md5, tags, url (old default)'
 54 |   ) do |filename|
 55 |     options[:filename] = filename.to_sym
 56 |   end
 57 |   opts.separator("\tNote: `-f tags` could miss some files due to filesystems' filename length limitation.")
 58 | 
 59 |   opts.separator("\nAuthentication:")
 60 |   opts.separator('    This is optional, but recommended since some boards block access without authentication.')
 61 |   opts.on('-u', '--user USERNAME', 'Username') do |user|
 62 |     options[:user] = user
 63 |   end
 64 |   opts.on('-p', '--password PASSWORD', 'Password (API Key for danbooru)') do |pass|
 65 |     options[:password] = pass
 66 |   end
 67 | 
 68 |   opts.separator("\nTools:")
 69 |   opts.separator(
 70 |     "    Ruby's file saver is used by default. You can change it using this options. " \
 71 |     '`wget` or `curl` binaries should be available.'
 72 |   )
 73 |   opts.on('-w', '--wget', 'Download using wget') do
 74 |     options[:downloader] = :wget
 75 |   end
 76 |   opts.on('-c', '--curl', 'Download using curl') do
 77 |     options[:downloader] = :curl
 78 |   end
 79 | 
 80 |   opts.separator("\nLimits:")
 81 |   opts.separator('    This option could be used multiple times with different limiters.')
 82 |   opts.on(
 83 |     '-l', '--limit LIMITER',
 84 |     'Limiters in the following format: limiter=number. Supported limiters: pages, posts, per_page'
 85 |   ) do |limiter|
 86 |     if limiter =~ /(pages|posts|per_page)=([1-9][0-9]*)/
 87 |       options[:limits][Regexp.last_match[1].to_sym] = Regexp.last_match[2].to_i
 88 |     else
 89 |       warn \
 90 |         "Wrong limiter: #{limiter}. It should be pages, posts or per_page and value should be a number greater than 0."
 91 |       exit 1
 92 |     end
 93 |   end
 94 | 
 95 |   opts.separator("\nHelp:")
 96 |   opts.on('-h', '--help', 'Print a help message') do
 97 |     puts opts
 98 |     exit
 99 |   end
100 | end
101 | 
102 | begin
103 |   optparse.parse!
104 | rescue StandardError => e
105 |   puts e
106 | end
107 | 
108 | if !options[:pool] && (ARGV.empty? || ARGV[0].empty?)
109 |   puts optparse.help
110 | else
111 |   board =
112 |     case options[:board]
113 |     when :konachan
114 |       Konachan.new(options)
115 |     when :behoimi
116 |       Behoimi.new(options)
117 |     when :yandere
118 |       Yandere.new(options)
119 |     when :testbooru
120 |       Testbooru.new(options)
121 |     else
122 |       Danbooru.new(options)
123 |     end
124 | 
125 |   if options[:pool]
126 |     board.download_by_pool(options[:pool])
127 |   else
128 |     board.download_by_tags(ARGV[0])
129 |   end
130 | end
131 | 


--------------------------------------------------------------------------------
/features/download_images_by_pool.feature:
--------------------------------------------------------------------------------
 1 | @pool
 2 | Feature: Download images by pool
 3 | 
 4 |   @danbooru
 5 |   Scenario: Download images from danbooru
 6 |     Given I want to download images from danbooru pool and save them using default pattern
 7 |     When I run script to download images using default saver
 8 |     Then I should see downloaded images by pool
 9 |     And I should see images in bbs file
10 | 
11 |   @konachan
12 |   Scenario: Download images from konachan
13 |     Given I want to download images from konachan pool and save them using id
14 |     When I run script to download images using wget
15 |     Then I should see downloaded images by pool
16 |     And I should see images in bbs file
17 | 
18 |   @behoimi
19 |   Scenario: Download images from behoimi
20 |     Given I want to download images from behoimi pool and save them using url
21 |     When I run script to download images using default saver
22 |     Then I should see downloaded images by pool
23 |     And I should see images in bbs file
24 | 
25 |   @yandere
26 |   Scenario: Download images from yandere
27 |     Given I want to download images from yandere pool and save them using url
28 |     When I run script to download images using curl
29 |     Then I should see downloaded images by pool
30 |     And I should see images in bbs file
31 | 


--------------------------------------------------------------------------------
/features/download_images_by_special.feature:
--------------------------------------------------------------------------------
 1 | @special_tag
 2 | Feature: Download images by special
 3 | 
 4 |   @danbooru
 5 |   Scenario: Download images from danbooru
 6 |     Given I want to download images from danbooru with special tag and save them using default pattern
 7 |     When I run script to download images using default saver
 8 |     Then I should see downloaded images by tag
 9 |     And I should see images in bbs file
10 | 
11 |   @konachan
12 |   Scenario: Download images from konachan
13 |     Given I want to download images from konachan with special tag and save them using id
14 |     When I run script to download images using wget
15 |     Then I should see downloaded images by tag
16 |     And I should see images in bbs file
17 | 
18 |   @behoimi
19 |   Scenario: Download images from behoimi
20 |     Given I want to download images from behoimi with special tag and save them using url
21 |     When I run script to download images using default saver
22 |     Then I should see downloaded images by tag
23 |     And I should see images in bbs file
24 | 
25 |   @yandere
26 |   Scenario: Download images from yandere
27 |     Given I want to download images from yandere with special tag and save them using url
28 |     When I run script to download images using curl
29 |     Then I should see downloaded images by tag
30 |     And I should see images in bbs file
31 | 


--------------------------------------------------------------------------------
/features/download_images_by_tag.feature:
--------------------------------------------------------------------------------
 1 | @tag
 2 | Feature: Download images by tag
 3 | 
 4 |   @danbooru
 5 |   Scenario: Download images from danbooru
 6 |     Given I want to download images from danbooru and save them using default pattern
 7 |     When I run script to download images using default saver
 8 |     Then I should see downloaded images by tag
 9 |     And I should see images and tags in bbs file
10 | 
11 |   @konachan
12 |   Scenario: Download images from konachan
13 |     Given I want to download images from konachan and save them using id
14 |     When I run script to download images using wget
15 |     Then I should see downloaded images by tag
16 |     And I should see images and tags in bbs file
17 | 
18 |   @behoimi
19 |   Scenario: Download images from behoimi
20 |     Given I want to download images from behoimi and save them using url
21 |     When I run script to download images using default saver
22 |     Then I should see downloaded images by tag
23 |     And I should see images and tags in bbs file
24 | 
25 |   @yandere
26 |   Scenario: Download images from yandere
27 |     Given I want to download images from yandere and save them using url
28 |     When I run script to download images using curl
29 |     Then I should see downloaded images by tag
30 |     And I should see images and tags in bbs file
31 | 


--------------------------------------------------------------------------------
/features/images/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xeron/danbooru-ruby-grabber/8c475bbeb1b9cee6ef8e7183e4c5d74c7b0d0225/features/images/.gitkeep


--------------------------------------------------------------------------------
/features/step_definitions/download_images_by_pool_steps.rb:
--------------------------------------------------------------------------------
1 | Given(/^I want to download images from (\S+) pool and save them using (.*)$/) do |board, pattern|
2 |   id = POOLS_MATRIX[board]['id']
3 |   @tags = POOLS_MATRIX[board]['name']
4 | 
5 |   filenames = pattern == 'default pattern' ? '' : "-f #{pattern}"
6 | 
7 |   @cmd = "#{BASE_CMD} -l posts=#{POSTS_COUNT} #{filenames} -b #{board} -P #{id}"
8 | end
9 | 


--------------------------------------------------------------------------------
/features/step_definitions/download_images_by_special_steps.steps.rb:
--------------------------------------------------------------------------------
1 | Given(/^I want to download images from (\S+) with special tag and save them using (.*)$/) do |board, pattern|
2 |   @tags = SPECIAL_MATRIX[board]
3 | 
4 |   filenames = pattern == 'default pattern' ? '' : "-f #{pattern}"
5 | 
6 |   @cmd = "#{BASE_CMD} -l posts=#{POSTS_COUNT} #{filenames} -b #{board} '#{@tags}'"
7 | end
8 | 


--------------------------------------------------------------------------------
/features/step_definitions/download_images_by_tag_steps.rb:
--------------------------------------------------------------------------------
 1 | Given(/^I want to download images from (\S+) and save them using (.*)$/) do |board, pattern|
 2 |   @tags = TAGS_MATRIX[board]
 3 |   limits = [
 4 |     "-l per_page=#{PER_PAGE} -l pages=#{PAGES}",
 5 |     "-l posts=#{POSTS_COUNT}"
 6 |   ]
 7 |   filenames = pattern == 'default pattern' ? '' : "-f #{pattern}"
 8 | 
 9 |   @cmd = "#{BASE_CMD} #{limits.sample} #{filenames} -b #{board} '#{@tags}'"
10 | end
11 | 
12 | Then(/^I should see images and tags in bbs file$/) do
13 |   bbs_file = File.join(@dir, 'files.bbs')
14 |   bbs = File.read(bbs_file)
15 | 
16 |   expect(bbs.split("\n").size).to eq @images_count
17 |   @tags.split.each do |tag|
18 |     @images.each do |image|
19 |       regex = Regexp.new("^#{Regexp.escape(image)}.*#{tag}.*\\n")
20 |       expect(bbs).to match regex
21 |     end
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/features/step_definitions/download_images_steps.rb:
--------------------------------------------------------------------------------
 1 | When(/^I run script to download images using (.*)$/) do |saver|
 2 |   @cmd += SAVER_MATRIX[saver]
 3 |   log(@cmd)
 4 | 
 5 |   output = `#{@cmd}`
 6 | 
 7 |   missed_count = output.split("\n").grep(/File url is unknown for .* fail/).size
 8 |   @images_count = POSTS_COUNT - missed_count
 9 | end
10 | 
11 | Then(/^I should see downloaded images by (.*)$/) do |source|
12 |   fm = FileMagic.new
13 |   @dir = File.join(BASE_DIR, sanitize_filename(@tags, pool: source == 'pool'))
14 |   files = list_files(@dir)
15 |   @images = files - ['files.bbs']
16 | 
17 |   expect(@images.size).to eq @images_count
18 |   @images.each do |image|
19 |     expect(fm.file(File.join(@dir, image))).to match(/image|Macromedia Flash|Zip archive data|WebM|MP4|MPEG/)
20 |   end
21 | end
22 | 
23 | Then(/^I should see images in bbs file$/) do
24 |   bbs_file = File.join(@dir, 'files.bbs')
25 |   bbs = File.read(bbs_file)
26 | 
27 |   expect(bbs.split("\n").size).to eq @images_count
28 |   @images.each do |image|
29 |     expect(bbs).to match Regexp.new("^#{Regexp.escape(image)}.*\\n")
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/features/support/env.rb:
--------------------------------------------------------------------------------
 1 | require 'fileutils'
 2 | require 'filemagic'
 3 | 
 4 | TAGS_MATRIX = {
 5 |   'danbooru' => 'touhou dress',
 6 |   'testbooru' => 'tagme highres',
 7 |   'konachan' => 'robotics;notes',
 8 |   'behoimi' => 'touhou',
 9 |   'yandere' => 'touhou'
10 | }.freeze
11 | POOLS_MATRIX = {
12 |   'danbooru' => { 'id' => 364, 'name' => 'Nanoha/Fate doujin' },
13 |   'testbooru' => { 'id' => 1, 'name' => 'Pool 1' },
14 |   'konachan' => { 'id' => 4, 'name' => 'Clannad Wallpapers (Zoomlayer + Logo + Name)' },
15 |   'behoimi' => { 'id' => 13, 'name' => 'Rumpalicious!' },
16 |   'yandere' => { 'id' => 1184, 'name' => 'E☆2 Etsu Magazine vol. 22 2009-12' }
17 | }.freeze
18 | SPECIAL_MATRIX = {
19 |   'danbooru' => 'rating:safe',
20 |   'testbooru' => 'order:rank',
21 |   'konachan' => 'vote:3:opai',
22 |   'behoimi' => 'user:darkgray',
23 |   'yandere' => 'date:2016-05-14'
24 | }.freeze
25 | PER_PAGE = 3
26 | PAGES = 2
27 | POSTS_COUNT = PER_PAGE * PAGES
28 | 
29 | SAVER_MATRIX = {
30 |   'default saver' => '',
31 |   'curl' => ' -c',
32 |   'wget' => ' -w'
33 | }.freeze
34 | 
35 | BASE_DIR = './features/images'.freeze
36 | BASE_CMD = "ruby danbooru.rb -d #{BASE_DIR}".freeze
37 | 
38 | # Clean all files before and after each scenario
39 | Before do
40 |   clean_files
41 | end
42 | 
43 | After do
44 |   clean_files
45 | end
46 | 
47 | def clean_files
48 |   pools_dirs = POOLS_MATRIX.values.map { |v| v['name'] }
49 |   (TAGS_MATRIX.values + pools_dirs + SPECIAL_MATRIX.values).each do |dir|
50 |     actual_dir1 = File.join(BASE_DIR, sanitize_filename(dir, pool: false))
51 |     actual_dir2 = File.join(BASE_DIR, sanitize_filename(dir, pool: true))
52 |     FileUtils.rm_r(actual_dir1) if Dir.exist?(actual_dir1)
53 |     FileUtils.rm_r(actual_dir2) if Dir.exist?(actual_dir2)
54 |   end
55 | end
56 | 
57 | def list_files(dir, pattern = '*')
58 |   Dir.chdir(dir) do
59 |     Dir[pattern]
60 |   end
61 | end
62 | 
63 | def sanitize_filename(filename, pool: false)
64 |   result = filename.gsub(%r{[?*/\\:]}, '_')
65 |   space_sub = pool ? '_' : '+'
66 |   result.gsub(' ', space_sub)
67 | end
68 | 


--------------------------------------------------------------------------------
/lib/behoimi.rb:
--------------------------------------------------------------------------------
 1 | class Behoimi < Booru
 2 |   API_BASE_URL = 'http://behoimi.org'
 3 |   PASSWORD_SALT = 'meganekko-heaven'
 4 |   API_KEY = false
 5 |   OLD_API = true
 6 |   REFERER = 'http://behoimi.org/post/show'
 7 | 
 8 |   def initialize(opts)
 9 |     super
10 |     @referer = REFERER
11 |   end
12 | 
13 |   def posts_by_tags(tags, page = 1, limit = LIMIT)
14 |     tags = clean_tags(tags)
15 |     posts_url = 'post/index.json'
16 |     do_request(posts_url, tags: tags, page: page, limit: limit)
17 |   end
18 | end
19 | 


--------------------------------------------------------------------------------
/lib/booru.rb:
--------------------------------------------------------------------------------
1 | $LOAD_PATH.unshift File.dirname(__FILE__)
2 | require 'booru/base'
3 | require 'booru/posts'
4 | require 'booru/pool'
5 | 


--------------------------------------------------------------------------------
/lib/booru/base.rb:
--------------------------------------------------------------------------------
  1 | require 'rubygems'
  2 | require 'json'
  3 | require 'nokogiri'
  4 | require 'net/http'
  5 | require 'net/https'
  6 | require 'open-uri'
  7 | require 'cgi'
  8 | require 'fileutils'
  9 | require 'optparse'
 10 | require 'digest/sha1'
 11 | require 'digest/md5'
 12 | 
 13 | class Booru
 14 |   API_BASE_URL = 'http://example.com'
 15 |   PASSWORD_SALT = nil
 16 |   API_KEY = true
 17 |   OLD_API = false
 18 |   USER_AGENT = 'drg/1.0.0'
 19 | 
 20 |   attr_accessor :options
 21 | 
 22 |   def initialize(opts)
 23 |     self.options = opts
 24 |     if options[:base_path]
 25 |       FileUtils.mkdir_p options[:base_path]
 26 |       Dir.chdir options[:base_path]
 27 |     end
 28 |     FileUtils.mkdir_p options[:storage] if options[:storage]
 29 |     @referer = self.class::API_BASE_URL
 30 |     options[:limits][:per_page] ||= 100
 31 |   end
 32 | 
 33 |   private
 34 | 
 35 |   def do_request(url, params = {}, method = :get, data = nil, format = :json, url_prepared = false, limit = 10)
 36 |     full_params = get_query_params(params)
 37 |     full_url = url_prepared ? url : prepare_url(url, full_params)
 38 |     uri = URI.join(self.class::API_BASE_URL, full_url)
 39 |     http_params = {
 40 |       'User-Agent' => USER_AGENT,
 41 |       'Referer' => @referer
 42 |     }
 43 | 
 44 |     http = Net::HTTP.new(uri.host, uri.port)
 45 |     if uri.scheme == 'https'
 46 |       http.use_ssl = true
 47 |       http.verify_mode = OpenSSL::SSL::VERIFY_NONE
 48 |     end
 49 |     case method
 50 |     when :get
 51 |       request = Net::HTTP::Get.new(uri.request_uri, http_params)
 52 |     when :post
 53 |       request = Net::HTTP::Post.new(uri.request_uri, http_params)
 54 |       request.content_type = 'application/x-www-form-urlencoded'
 55 |       request.body = "data=#{data}" if data
 56 |     end
 57 | 
 58 |     response = http.request(request)
 59 | 
 60 |     case response
 61 |     when Net::HTTPSuccess then parse_response(response, format)
 62 |     when Net::HTTPRedirection
 63 |       if limit.positive?
 64 |         do_request(response['location'], params, method, data, format, true, limit - 1)
 65 |       else
 66 |         warn 'Too many redirects.'
 67 |         exit 1
 68 |       end
 69 |     else response.value
 70 |     end
 71 |   end
 72 | 
 73 |   def prepare_url(url, full_params)
 74 |     [
 75 |       url,
 76 |       full_params.map { |key, val| "#{key}=#{CGI.escape(val.to_s)}" }.join('&')
 77 |     ].join('?').gsub('%2B', '+')
 78 |   end
 79 | 
 80 |   def get_query_params(params)
 81 |     unless options[:user].nil? || options[:password].nil?
 82 |       password_key = API_KEY ? :api_key : :password_hash
 83 |       params[:login] = options[:user]
 84 |       params[password_key] = get_password_hash(self.class::PASSWORD_SALT)
 85 |     end
 86 |     params
 87 |   end
 88 | 
 89 |   def get_password_hash(salt)
 90 |     if salt
 91 |       Digest::SHA1.hexdigest("#{salt}--#{options[:password]}--")
 92 |     else
 93 |       options[:password]
 94 |     end
 95 |   end
 96 | 
 97 |   def parse_response(response, format)
 98 |     response_ok = true
 99 | 
100 |     case format
101 |     when :json
102 |       response_hash = JSON.parse(response.body)
103 |       response_ok = false if response_hash.include?('success') && response_hash['success'] == false
104 |     when :xml
105 |       response_hash = Nokogiri::XML(response.body)
106 |       response_ok = false if response_hash.root['success'] == 'false'
107 |     else
108 |       raise 'Unknown format'
109 |     end
110 | 
111 |     return response_hash if response_ok
112 | 
113 |     raise response_hash
114 |   end
115 | 
116 |   def only_new_api
117 |     return unless self.class::OLD_API
118 | 
119 |     warn 'Supported only with a new API (danbooru.donmai.us)'
120 |     exit 1
121 |   end
122 | 
123 |   def only_old_api
124 |     return if self.class::OLD_API
125 | 
126 |     warn 'Supported only with an old API (not danbooru.donmai.us)'
127 |     exit 1
128 |   end
129 | 
130 |   def sanitize_filename(filename)
131 |     filename.gsub(%r{[?*/\\:]}, '_')
132 |   end
133 | end
134 | 


--------------------------------------------------------------------------------
/lib/booru/pool.rb:
--------------------------------------------------------------------------------
 1 | class Booru
 2 |   # Get pool by id
 3 |   # http://danbooru.donmai.us/pools/1.json
 4 |   # https://yande.re/pool/show.json?id=1&page=1
 5 |   # === Returns
 6 |   # Hash:: Pool data
 7 |   def pool(id, page = 1)
 8 |     if self.class::OLD_API
 9 |       do_request('pool/show.json', id: id, page: page)
10 |     else
11 |       do_request("pools/#{id}.json")
12 |     end
13 |   end
14 | 
15 |   def download_by_pool(id)
16 |     if self.class::OLD_API
17 |       data = pool(id)
18 |       pool_data = data['pool'] || data.reject { |k| k == 'posts' }
19 |     else
20 |       pool_data = pool(id)
21 |     end
22 |     puts "Pool name: #{pool_data['name']}."
23 |     pool_dir = sanitize_filename(pool_data['name'])
24 |     FileUtils.mkdir_p pool_dir
25 | 
26 |     bbs_path = File.join(options[:storage] || pool_dir, 'files.bbs')
27 |     bbs = File.new(bbs_path, 'a+')
28 |     old_bbs = bbs.read
29 | 
30 |     count = pool_data['post_count']
31 |     if count.zero?
32 |       puts 'No posts, nothing to do.'
33 |     else
34 |       num = 1
35 |       if self.class::OLD_API
36 |         pages = (count.to_f / data['posts'].count).ceil
37 |         1.upto(pages) do |page|
38 |           puts "Page #{page}/#{pages}:"
39 |           pool(id, page)['posts'].each do |post_data|
40 |             download_post(post_data, pool_dir, num, count, bbs, old_bbs)
41 |             num += 1
42 |           end
43 |         end
44 |       else
45 |         pool_data['post_ids'].each do |post_id|
46 |           post_data = post(post_id)
47 |           download_post(post_data, pool_dir, num, count, bbs, old_bbs)
48 |           num += 1
49 |         end
50 |       end
51 |     end
52 |   end
53 | end
54 | 


--------------------------------------------------------------------------------
/lib/booru/posts.rb:
--------------------------------------------------------------------------------
  1 | class Booru
  2 |   # Get post by id
  3 |   # Only new API
  4 |   # http://danbooru.donmai.us/posts/$id.json
  5 |   # === Returns
  6 |   # Hash:: Data of found post
  7 |   def post(id)
  8 |     only_new_api
  9 |     do_request("posts/#{id}.json")
 10 |   end
 11 | 
 12 |   # Get posts by tags
 13 |   # http://danbooru.donmai.us/posts.json?tags=touhou&page=1&limit=100
 14 |   # https://yande.re/post.json?tags=touhou&page=1&limit=100
 15 |   # === Returns
 16 |   # Array:: Hashes of found posts
 17 |   def posts_by_tags(tags, page = 1, limit = options[:limits][:per_page])
 18 |     tags = clean_tags(tags)
 19 |     posts_url = self.class::OLD_API ? 'post.json' : 'posts.json'
 20 |     do_request(posts_url, tags: tags, page: page, limit: limit)
 21 |   end
 22 | 
 23 |   # Get posts count by tag
 24 |   # http://danbooru.donmai.us/counts/posts.json?tags=touhou
 25 |   # https://yande.re/post/index.xml?tags=touhou&limit=1
 26 |   # === Returns
 27 |   # Integer:: Number of posts
 28 |   def posts_count_by_tag(tags)
 29 |     tags = clean_tags(tags)
 30 |     if self.class::OLD_API
 31 |       do_request('post/index.xml', { tags: tags, limit: 1 }, :get, nil, :xml).root['count'].to_i
 32 |     else
 33 |       do_request('counts/posts.json', tags: tags)['counts']['posts']
 34 |     end
 35 |   end
 36 | 
 37 |   def download_by_tags(tags)
 38 |     tags = clean_tags(tags)
 39 |     tags_dir = sanitize_filename(tags)
 40 |     puts "Tags are #{tags}."
 41 |     FileUtils.mkdir_p tags_dir
 42 | 
 43 |     bbs_path = File.join(options[:storage] || tags_dir, 'files.bbs')
 44 |     bbs = File.new(bbs_path, 'a+')
 45 |     old_bbs = bbs.read
 46 | 
 47 |     count = posts_count_by_tag(tags)
 48 |     if count.zero?
 49 |       puts 'No posts, nothing to do.'
 50 |     else
 51 |       pages = (count.to_f / options[:limits][:per_page]).ceil
 52 |       pages = options[:limits][:pages] if options[:limits][:pages] && options[:limits][:pages] < pages
 53 |       num = 1
 54 |       1.upto(pages) do |page|
 55 |         puts "Page #{page}/#{pages}:"
 56 |         posts_by_tags(tags, page, options[:limits][:per_page]).each do |post_data|
 57 |           download_post(post_data, tags_dir, num, count, bbs, old_bbs)
 58 |           num += 1
 59 |         end
 60 |       end
 61 |     end
 62 |   end
 63 | 
 64 |   private
 65 | 
 66 |   def download_post(post_data, target, num, count, bbs, old_bbs)
 67 |     exit if options[:limits][:posts] && options[:limits][:posts] < num
 68 | 
 69 |     # Prepare post data
 70 |     if post_data['file_url']
 71 |       url = get_url(post_data['file_url'])
 72 |       md5 = post_data['md5']
 73 |     else
 74 |       # url, md5 = get_data_from_html(post_data['id'])
 75 |       # return if url.nil?
 76 |       puts "File url is unknown for #{post_data['id']}."
 77 |       return
 78 |     end
 79 |     filename = get_filename(post_data, md5, url)
 80 |     tag_string = self.class::OLD_API ? post_data['tags'] : post_data['tag_string']
 81 | 
 82 |     path =
 83 |       if options[:storage]
 84 |         File.join(options[:storage], filename)
 85 |       else
 86 |         File.join(target, filename)
 87 |       end
 88 |     if File.exist?(path) && md5 == Digest::MD5.hexdigest(File.read(path))
 89 |       puts "File exists - #{path} (#{num}/#{count})"
 90 |     else
 91 |       puts "Saving #{path}... (#{num}/#{count})"
 92 |       download_with_tool(url, path)
 93 |       puts 'saved!'
 94 |     end
 95 |     FileUtils.ln_sf(File.join('..', path), File.join(target, filename)) if options[:storage]
 96 |     write_tags(filename, tag_string, bbs) unless /^#{filename}/.match?(old_bbs)
 97 |   end
 98 | 
 99 |   def get_url(file_url)
100 |     if URI::DEFAULT_PARSER.make_regexp.match?(file_url)
101 |       file_url
102 |     elsif file_url.start_with?(self.class::API_BASE_URL.gsub('http:', ''))
103 |       "http:#{file_url}"
104 |     else
105 |       self.class::API_BASE_URL + file_url
106 |     end
107 |   end
108 | 
109 |   def get_data_from_html(id)
110 |     print "File url is unknown for #{id}. Trying HTML... "
111 |     begin
112 |       html_data = URI.join(self.class::API_BASE_URL, '/posts/', id.to_s).open.read
113 |       nokogiri_data = Nokogiri::HTML(html_data)
114 |       file_url = nokogiri_data.css('section #image-container').first['data-file-url']
115 |       md5 = nokogiri_data.css('section #image-container').first['data-md5']
116 |       result = [get_url(file_url), md5]
117 |       puts 'success!'
118 |     rescue StandardError
119 |       puts 'fail. Giving up.'
120 |       result = [nil, nil]
121 |     end
122 |     result
123 |   end
124 | 
125 |   def get_filename(post_data, md5, url)
126 |     real_filename = CGI.unescape(File.basename(URI.parse(url).path))
127 |     ext = File.extname(real_filename)
128 | 
129 |     case options[:filename]
130 |     when :md5
131 |       md5 + ext
132 |     when :tags
133 |       tags_key = self.class::OLD_API ? 'tags' : 'tag_string'
134 |       post_data[tags_key] + ext
135 |     when :url
136 |       real_filename
137 |     else
138 |       post_data['id'].to_s + ext
139 |     end
140 |   end
141 | 
142 |   def download_with_tool(url, path)
143 |     case options[:downloader]
144 |     when :wget
145 |       `wget -nv "#{url}" -O "#{path}" --user-agent="#{USER_AGENT}" --referer="#{@referer}"`
146 |     when :curl
147 |       `curl -L -A "#{USER_AGENT}" -e "#{@referer}" --progress-bar -o "#{path}" "#{url}"`
148 |     else
149 |       File.open(path, 'wb') do |f|
150 |         data = URI.parse(url).open('rb', 'User-Agent' => USER_AGENT, 'Referer' => @referer).read
151 |         f.write(data)
152 |       end
153 |     end
154 |   end
155 | 
156 |   def write_tags(filename, tags, bbs)
157 |     bbs.puts "#{filename} - #{tags}"
158 |   end
159 | 
160 |   def clean_tags(tags)
161 |     tags.tr(' ', '+')
162 |   end
163 | end
164 | 


--------------------------------------------------------------------------------
/lib/danbooru.rb:
--------------------------------------------------------------------------------
1 | class Danbooru < Booru
2 |   API_BASE_URL = 'https://danbooru.donmai.us'
3 | end
4 | 


--------------------------------------------------------------------------------
/lib/konachan.rb:
--------------------------------------------------------------------------------
1 | class Konachan < Booru
2 |   API_BASE_URL = 'http://konachan.com'
3 |   PASSWORD_SALT = 'So-I-Heard-You-Like-Mupkids-?'
4 |   API_KEY = false
5 |   OLD_API = true
6 | end
7 | 


--------------------------------------------------------------------------------
/lib/testbooru.rb:
--------------------------------------------------------------------------------
1 | class Testbooru < Danbooru
2 |   API_BASE_URL = 'https://testbooru.donmai.us'
3 | end
4 | 


--------------------------------------------------------------------------------
/lib/yandere.rb:
--------------------------------------------------------------------------------
1 | class Yandere < Booru
2 |   API_BASE_URL = 'https://yande.re'
3 |   PASSWORD_SALT = 'choujin-steiner'
4 |   API_KEY = false
5 |   OLD_API = true
6 | end
7 | 


--------------------------------------------------------------------------------