├── .gitignore ├── .rubocop.yml ├── .rubocop_todo.yml ├── Gemfile ├── Gemfile.lock ├── LICENSE ├── README.md ├── Rakefile ├── danbooru.rb ├── features ├── download_images_by_pool.feature ├── download_images_by_special.feature ├── download_images_by_tag.feature ├── images │ └── .gitkeep ├── step_definitions │ ├── download_images_by_pool_steps.rb │ ├── download_images_by_special_steps.steps.rb │ ├── download_images_by_tag_steps.rb │ └── download_images_steps.rb └── support │ └── env.rb └── lib ├── behoimi.rb ├── booru.rb ├── booru ├── base.rb ├── pool.rb └── posts.rb ├── danbooru.rb ├── konachan.rb ├── testbooru.rb └── yandere.rb /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | images 3 | .bundle 4 | bundle 5 | vendor/bundle 6 | features/images 7 | !features/images/.gitkeep 8 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | require: 2 | - rubocop-performance 3 | - rubocop-rake 4 | - rubocop-rspec 5 | 6 | inherit_from: .rubocop_todo.yml 7 | 8 | AllCops: 9 | Exclude: 10 | - 'bundle/**/*' 11 | - 'vendor/**/*' 12 | - 'images/**/*' 13 | TargetRubyVersion: 2.7 14 | NewCops: enable 15 | 16 | Style/FrozenStringLiteralComment: 17 | Enabled: false 18 | -------------------------------------------------------------------------------- /.rubocop_todo.yml: -------------------------------------------------------------------------------- 1 | Layout/LineLength: 2 | Max: 120 3 | 4 | Metrics/AbcSize: 5 | Max: 41 6 | 7 | Metrics/ClassLength: 8 | Max: 128 9 | 10 | Metrics/CyclomaticComplexity: 11 | Max: 10 12 | 13 | Metrics/MethodLength: 14 | Max: 34 15 | 16 | Metrics/ParameterLists: 17 | Max: 7 18 | MaxOptionalParameters: 6 19 | 20 | Metrics/PerceivedComplexity: 21 | Max: 13 22 | 23 | Style/Documentation: 24 | Enabled: false 25 | 26 | Style/MutableConstant: 27 | Exclude: 28 | - 'lib/behoimi.rb' 29 | - 'lib/booru/base.rb' 30 | - 'lib/danbooru.rb' 31 | - 'lib/testbooru.rb' 32 | - 'lib/konachan.rb' 33 | - 'lib/yandere.rb' 34 | 35 | Style/OptionalBooleanParameter: 36 | Exclude: 37 | - 'lib/booru/base.rb' 38 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gem 'json' 4 | gem 'nokogiri' 5 | 6 | group :development, :test do 7 | # gem 'debug', platforms: %i[mri mingw x64_mingw] 8 | gem 'cucumber', require: false 9 | gem 'rake', require: false 10 | gem 'rspec', require: false 11 | gem 'rubocop', require: false 12 | gem 'rubocop-performance', require: false 13 | gem 'rubocop-rake', require: false 14 | gem 'rubocop-rspec', require: false 15 | gem 'ruby-filemagic', require: false 16 | end 17 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | ast (2.4.2) 5 | bigdecimal (3.1.8) 6 | builder (3.3.0) 7 | cucumber (9.2.0) 8 | builder (~> 3.2) 9 | cucumber-ci-environment (> 9, < 11) 10 | cucumber-core (> 13, < 14) 11 | cucumber-cucumber-expressions (~> 17.0) 12 | cucumber-gherkin (> 24, < 28) 13 | cucumber-html-formatter (> 20.3, < 22) 14 | cucumber-messages (> 19, < 25) 15 | diff-lcs (~> 1.5) 16 | mini_mime (~> 1.1) 17 | multi_test (~> 1.1) 18 | sys-uname (~> 1.2) 19 | cucumber-ci-environment (10.0.1) 20 | cucumber-core (13.0.3) 21 | cucumber-gherkin (>= 27, < 28) 22 | cucumber-messages (>= 20, < 23) 23 | cucumber-tag-expressions (> 5, < 7) 24 | cucumber-cucumber-expressions (17.1.0) 25 | bigdecimal 26 | cucumber-gherkin (27.0.0) 27 | cucumber-messages (>= 19.1.4, < 23) 28 | cucumber-html-formatter (21.7.0) 29 | cucumber-messages (> 19, < 27) 30 | cucumber-messages (22.0.0) 31 | cucumber-tag-expressions (6.1.0) 32 | diff-lcs (1.5.1) 33 | ffi (1.17.0) 34 | json (2.7.2) 35 | language_server-protocol (3.17.0.3) 36 | mini_mime (1.1.5) 37 | mini_portile2 (2.8.7) 38 | multi_test (1.1.0) 39 | nokogiri (1.16.7) 40 | mini_portile2 (~> 2.8.2) 41 | racc (~> 1.4) 42 | parallel (1.26.3) 43 | parser (3.3.5.0) 44 | ast (~> 2.4.1) 45 | racc 46 | racc (1.8.1) 47 | rainbow (3.1.1) 48 | rake (13.2.1) 49 | regexp_parser (2.9.2) 50 | rspec (3.13.0) 51 | rspec-core (~> 3.13.0) 52 | rspec-expectations (~> 3.13.0) 53 | rspec-mocks (~> 3.13.0) 54 | rspec-core (3.13.2) 55 | rspec-support (~> 3.13.0) 56 | rspec-expectations (3.13.3) 57 | diff-lcs (>= 1.2.0, < 2.0) 58 | rspec-support (~> 3.13.0) 59 | rspec-mocks (3.13.2) 60 | diff-lcs (>= 1.2.0, < 2.0) 61 | rspec-support (~> 3.13.0) 62 | rspec-support (3.13.1) 63 | rubocop (1.67.0) 64 | json (~> 2.3) 65 | language_server-protocol (>= 3.17.0) 66 | parallel (~> 1.10) 67 | parser (>= 3.3.0.2) 68 | rainbow (>= 2.2.2, < 4.0) 69 | regexp_parser (>= 2.4, < 3.0) 70 | rubocop-ast (>= 1.32.2, < 2.0) 71 | ruby-progressbar (~> 1.7) 72 | unicode-display_width (>= 2.4.0, < 3.0) 73 | rubocop-ast (1.32.3) 74 | parser (>= 3.3.1.0) 75 | rubocop-performance (1.22.1) 76 | rubocop (>= 1.48.1, < 2.0) 77 | rubocop-ast (>= 1.31.1, < 2.0) 78 | rubocop-rake (0.6.0) 79 | rubocop (~> 1.0) 80 | rubocop-rspec (3.1.0) 81 | rubocop (~> 1.61) 82 | ruby-filemagic (0.7.3) 83 | ruby-progressbar (1.13.0) 84 | sys-uname (1.3.0) 85 | ffi (~> 1.1) 86 | unicode-display_width (2.6.0) 87 | 88 | PLATFORMS 89 | ruby 90 | 91 | DEPENDENCIES 92 | cucumber 93 | json 94 | nokogiri 95 | rake 96 | rspec 97 | rubocop 98 | rubocop-performance 99 | rubocop-rake 100 | rubocop-rspec 101 | ruby-filemagic 102 | 103 | BUNDLED WITH 104 | 2.5.22 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Ivan Larionov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | danbooru-ruby-grabber is a danbooru downloader — simple script which downloads images from danbooru.donmai.us, konachan.com, behoimi.org and yande.re. Support of any danbooru-powered site could be added easily. 4 | 5 | ## Usage 6 | 7 | ``` 8 | Usage: danbooru.rb [options] "tags" 9 | 10 | Target: 11 | -b, --board BOARDNAME Target board. Supported options: danbooru (default), konachan, behoimi, yandere 12 | -P, --pool POOL_ID Pool ID (tags will be ignored) 13 | 14 | Storage options: 15 | -s, --storage DIR Storage mode (all images in one dir and symlinks in tagged dirs) 16 | -d, --directory BASE_DIR Base directory to save images. By default it uses the same location as script 17 | -f, --filename PATTERN Filename pattern. Supported options: id (default), md5, tags, url (old default) 18 | Note: `-f tags` could miss some files due to filesystems' filename length limitation. 19 | 20 | Authentication: 21 | This is optional, but recommended since some boards block access without authentication. 22 | -u, --user USERNAME Username 23 | -p, --password PASSWORD Password (API Key for danbooru) 24 | 25 | Tools: 26 | Ruby's file saver is used by default. You can change it using this options. `wget` or `curl` binaries should be available. 27 | -w, --wget Download using wget 28 | -c, --curl Download using curl 29 | 30 | Limits: 31 | This option could be used multiple times with different limiters. 32 | -l, --limit LIMITER Limiters in the following format: limiter=number. Supported limiters: pages, posts, per_page 33 | 34 | Help: 35 | -h, --help Print a help message 36 | ``` 37 | 38 | ## Notes 39 | 40 | * To prevent duplicates files are stored using post id based filenames. You can change this behavior using `-f` option. 41 | 42 | * Using `-f tags` could miss some files due to filesystems' filename length limitation. 43 | 44 | * Always use `-u` and `-p` options with danbooru, because they block requests without login and password. 45 | 46 | * Number of tags you can use at the same time could be limited by board (for example danbooru limits basic accounts by 2 tags) 47 | 48 | * Images are stored in tag-named directory. 49 | 50 | * Script creates file named `files.bbs` with all tags of each image. 51 | 52 | ## Installation 53 | 54 | You need json and nokogiri gems to be installed. You can install them by this command: 55 | 56 | `gem install json nokogiri` 57 | 58 | Note: may be you need to use `sudo`. 59 | 60 | ## Bonus 61 | 62 | Have fun. 63 | 64 | * Author: Xeron 65 | * E-mail: xeron.oskom@gmail.com 66 | * Homepage: https://xeron.me 67 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'fileutils' 2 | require 'cucumber' 3 | require 'cucumber/rake/task' 4 | require 'rubocop/rake_task' 5 | 6 | Cucumber::Rake::Task.new(:features) do |t| 7 | t.cucumber_opts = ['features', '--format', 'pretty'] 8 | end 9 | 10 | namespace :spec do 11 | desc 'Clean up rbx compiled files and run cucumber tests' 12 | Cucumber::Rake::Task.new(:ci) do 13 | Dir.glob('**/*.rbc').each { |f| FileUtils.rm_f(f) } 14 | end 15 | end 16 | 17 | RuboCop::RakeTask.new 18 | -------------------------------------------------------------------------------- /danbooru.rb: -------------------------------------------------------------------------------- 1 | # Author: Ivan "Xeron" Larionov 2 | # E-mail: xeron.oskom@gmail.com 3 | # Homepage: http://blog.xeron.me 4 | # Version: 2.7 5 | 6 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), 'lib') 7 | require 'booru' 8 | require 'danbooru' 9 | require 'testbooru' 10 | require 'konachan' 11 | require 'behoimi' 12 | require 'yandere' 13 | 14 | options = {} 15 | options[:limits] = {} 16 | optparse = OptionParser.new do |opts| # rubocop:disable Metrics/BlockLength 17 | opts.banner = 'Usage: danbooru.rb [options] "tags"' 18 | 19 | opts.separator("\nTarget:") 20 | opts.on( 21 | '-b', '--board BOARDNAME', 22 | 'Target board. Supported options: danbooru (default), konachan, behoimi, yandere' 23 | ) do |board| 24 | options[:board] = board.to_sym 25 | end 26 | opts.on( 27 | '-P', '--pool POOL_ID', 28 | 'Pool ID (tags will be ignored)' 29 | ) do |pool| 30 | if /^[1-9][0-9]*/.match?(pool) 31 | options[:pool] = pool.to_i 32 | else 33 | warn "Wrong pool id: #{pool}. It should be a number greater than 0." 34 | exit 1 35 | end 36 | end 37 | 38 | opts.separator("\nStorage options:") 39 | opts.on( 40 | '-s', '--storage DIR', 41 | 'Storage mode (all images in one dir and symlinks in tagged dirs)' 42 | ) do |dir| 43 | options[:storage] = dir 44 | end 45 | opts.on( 46 | '-d', '--directory BASE_DIR', 47 | 'Base directory to save images. By default it uses the same location as script' 48 | ) do |base_path| 49 | options[:base_path] = base_path 50 | end 51 | opts.on( 52 | '-f', '--filename PATTERN', 53 | 'Filename pattern. Supported options: id (default), md5, tags, url (old default)' 54 | ) do |filename| 55 | options[:filename] = filename.to_sym 56 | end 57 | opts.separator("\tNote: `-f tags` could miss some files due to filesystems' filename length limitation.") 58 | 59 | opts.separator("\nAuthentication:") 60 | opts.separator(' This is optional, but recommended since some boards block access without authentication.') 61 | opts.on('-u', '--user USERNAME', 'Username') do |user| 62 | options[:user] = user 63 | end 64 | opts.on('-p', '--password PASSWORD', 'Password (API Key for danbooru)') do |pass| 65 | options[:password] = pass 66 | end 67 | 68 | opts.separator("\nTools:") 69 | opts.separator( 70 | " Ruby's file saver is used by default. You can change it using this options. " \ 71 | '`wget` or `curl` binaries should be available.' 72 | ) 73 | opts.on('-w', '--wget', 'Download using wget') do 74 | options[:downloader] = :wget 75 | end 76 | opts.on('-c', '--curl', 'Download using curl') do 77 | options[:downloader] = :curl 78 | end 79 | 80 | opts.separator("\nLimits:") 81 | opts.separator(' This option could be used multiple times with different limiters.') 82 | opts.on( 83 | '-l', '--limit LIMITER', 84 | 'Limiters in the following format: limiter=number. Supported limiters: pages, posts, per_page' 85 | ) do |limiter| 86 | if limiter =~ /(pages|posts|per_page)=([1-9][0-9]*)/ 87 | options[:limits][Regexp.last_match[1].to_sym] = Regexp.last_match[2].to_i 88 | else 89 | warn \ 90 | "Wrong limiter: #{limiter}. It should be pages, posts or per_page and value should be a number greater than 0." 91 | exit 1 92 | end 93 | end 94 | 95 | opts.separator("\nHelp:") 96 | opts.on('-h', '--help', 'Print a help message') do 97 | puts opts 98 | exit 99 | end 100 | end 101 | 102 | begin 103 | optparse.parse! 104 | rescue StandardError => e 105 | puts e 106 | end 107 | 108 | if !options[:pool] && (ARGV.empty? || ARGV[0].empty?) 109 | puts optparse.help 110 | else 111 | board = 112 | case options[:board] 113 | when :konachan 114 | Konachan.new(options) 115 | when :behoimi 116 | Behoimi.new(options) 117 | when :yandere 118 | Yandere.new(options) 119 | when :testbooru 120 | Testbooru.new(options) 121 | else 122 | Danbooru.new(options) 123 | end 124 | 125 | if options[:pool] 126 | board.download_by_pool(options[:pool]) 127 | else 128 | board.download_by_tags(ARGV[0]) 129 | end 130 | end 131 | -------------------------------------------------------------------------------- /features/download_images_by_pool.feature: -------------------------------------------------------------------------------- 1 | @pool 2 | Feature: Download images by pool 3 | 4 | @danbooru 5 | Scenario: Download images from danbooru 6 | Given I want to download images from danbooru pool and save them using default pattern 7 | When I run script to download images using default saver 8 | Then I should see downloaded images by pool 9 | And I should see images in bbs file 10 | 11 | @konachan 12 | Scenario: Download images from konachan 13 | Given I want to download images from konachan pool and save them using id 14 | When I run script to download images using wget 15 | Then I should see downloaded images by pool 16 | And I should see images in bbs file 17 | 18 | @behoimi 19 | Scenario: Download images from behoimi 20 | Given I want to download images from behoimi pool and save them using url 21 | When I run script to download images using default saver 22 | Then I should see downloaded images by pool 23 | And I should see images in bbs file 24 | 25 | @yandere 26 | Scenario: Download images from yandere 27 | Given I want to download images from yandere pool and save them using url 28 | When I run script to download images using curl 29 | Then I should see downloaded images by pool 30 | And I should see images in bbs file 31 | -------------------------------------------------------------------------------- /features/download_images_by_special.feature: -------------------------------------------------------------------------------- 1 | @special_tag 2 | Feature: Download images by special 3 | 4 | @danbooru 5 | Scenario: Download images from danbooru 6 | Given I want to download images from danbooru with special tag and save them using default pattern 7 | When I run script to download images using default saver 8 | Then I should see downloaded images by tag 9 | And I should see images in bbs file 10 | 11 | @konachan 12 | Scenario: Download images from konachan 13 | Given I want to download images from konachan with special tag and save them using id 14 | When I run script to download images using wget 15 | Then I should see downloaded images by tag 16 | And I should see images in bbs file 17 | 18 | @behoimi 19 | Scenario: Download images from behoimi 20 | Given I want to download images from behoimi with special tag and save them using url 21 | When I run script to download images using default saver 22 | Then I should see downloaded images by tag 23 | And I should see images in bbs file 24 | 25 | @yandere 26 | Scenario: Download images from yandere 27 | Given I want to download images from yandere with special tag and save them using url 28 | When I run script to download images using curl 29 | Then I should see downloaded images by tag 30 | And I should see images in bbs file 31 | -------------------------------------------------------------------------------- /features/download_images_by_tag.feature: -------------------------------------------------------------------------------- 1 | @tag 2 | Feature: Download images by tag 3 | 4 | @danbooru 5 | Scenario: Download images from danbooru 6 | Given I want to download images from danbooru and save them using default pattern 7 | When I run script to download images using default saver 8 | Then I should see downloaded images by tag 9 | And I should see images and tags in bbs file 10 | 11 | @konachan 12 | Scenario: Download images from konachan 13 | Given I want to download images from konachan and save them using id 14 | When I run script to download images using wget 15 | Then I should see downloaded images by tag 16 | And I should see images and tags in bbs file 17 | 18 | @behoimi 19 | Scenario: Download images from behoimi 20 | Given I want to download images from behoimi and save them using url 21 | When I run script to download images using default saver 22 | Then I should see downloaded images by tag 23 | And I should see images and tags in bbs file 24 | 25 | @yandere 26 | Scenario: Download images from yandere 27 | Given I want to download images from yandere and save them using url 28 | When I run script to download images using curl 29 | Then I should see downloaded images by tag 30 | And I should see images and tags in bbs file 31 | -------------------------------------------------------------------------------- /features/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xeron/danbooru-ruby-grabber/8c475bbeb1b9cee6ef8e7183e4c5d74c7b0d0225/features/images/.gitkeep -------------------------------------------------------------------------------- /features/step_definitions/download_images_by_pool_steps.rb: -------------------------------------------------------------------------------- 1 | Given(/^I want to download images from (\S+) pool and save them using (.*)$/) do |board, pattern| 2 | id = POOLS_MATRIX[board]['id'] 3 | @tags = POOLS_MATRIX[board]['name'] 4 | 5 | filenames = pattern == 'default pattern' ? '' : "-f #{pattern}" 6 | 7 | @cmd = "#{BASE_CMD} -l posts=#{POSTS_COUNT} #{filenames} -b #{board} -P #{id}" 8 | end 9 | -------------------------------------------------------------------------------- /features/step_definitions/download_images_by_special_steps.steps.rb: -------------------------------------------------------------------------------- 1 | Given(/^I want to download images from (\S+) with special tag and save them using (.*)$/) do |board, pattern| 2 | @tags = SPECIAL_MATRIX[board] 3 | 4 | filenames = pattern == 'default pattern' ? '' : "-f #{pattern}" 5 | 6 | @cmd = "#{BASE_CMD} -l posts=#{POSTS_COUNT} #{filenames} -b #{board} '#{@tags}'" 7 | end 8 | -------------------------------------------------------------------------------- /features/step_definitions/download_images_by_tag_steps.rb: -------------------------------------------------------------------------------- 1 | Given(/^I want to download images from (\S+) and save them using (.*)$/) do |board, pattern| 2 | @tags = TAGS_MATRIX[board] 3 | limits = [ 4 | "-l per_page=#{PER_PAGE} -l pages=#{PAGES}", 5 | "-l posts=#{POSTS_COUNT}" 6 | ] 7 | filenames = pattern == 'default pattern' ? '' : "-f #{pattern}" 8 | 9 | @cmd = "#{BASE_CMD} #{limits.sample} #{filenames} -b #{board} '#{@tags}'" 10 | end 11 | 12 | Then(/^I should see images and tags in bbs file$/) do 13 | bbs_file = File.join(@dir, 'files.bbs') 14 | bbs = File.read(bbs_file) 15 | 16 | expect(bbs.split("\n").size).to eq @images_count 17 | @tags.split.each do |tag| 18 | @images.each do |image| 19 | regex = Regexp.new("^#{Regexp.escape(image)}.*#{tag}.*\\n") 20 | expect(bbs).to match regex 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /features/step_definitions/download_images_steps.rb: -------------------------------------------------------------------------------- 1 | When(/^I run script to download images using (.*)$/) do |saver| 2 | @cmd += SAVER_MATRIX[saver] 3 | log(@cmd) 4 | 5 | output = `#{@cmd}` 6 | 7 | missed_count = output.split("\n").grep(/File url is unknown for .* fail/).size 8 | @images_count = POSTS_COUNT - missed_count 9 | end 10 | 11 | Then(/^I should see downloaded images by (.*)$/) do |source| 12 | fm = FileMagic.new 13 | @dir = File.join(BASE_DIR, sanitize_filename(@tags, pool: source == 'pool')) 14 | files = list_files(@dir) 15 | @images = files - ['files.bbs'] 16 | 17 | expect(@images.size).to eq @images_count 18 | @images.each do |image| 19 | expect(fm.file(File.join(@dir, image))).to match(/image|Macromedia Flash|Zip archive data|WebM|MP4|MPEG/) 20 | end 21 | end 22 | 23 | Then(/^I should see images in bbs file$/) do 24 | bbs_file = File.join(@dir, 'files.bbs') 25 | bbs = File.read(bbs_file) 26 | 27 | expect(bbs.split("\n").size).to eq @images_count 28 | @images.each do |image| 29 | expect(bbs).to match Regexp.new("^#{Regexp.escape(image)}.*\\n") 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /features/support/env.rb: -------------------------------------------------------------------------------- 1 | require 'fileutils' 2 | require 'filemagic' 3 | 4 | TAGS_MATRIX = { 5 | 'danbooru' => 'touhou dress', 6 | 'testbooru' => 'tagme highres', 7 | 'konachan' => 'robotics;notes', 8 | 'behoimi' => 'touhou', 9 | 'yandere' => 'touhou' 10 | }.freeze 11 | POOLS_MATRIX = { 12 | 'danbooru' => { 'id' => 364, 'name' => 'Nanoha/Fate doujin' }, 13 | 'testbooru' => { 'id' => 1, 'name' => 'Pool 1' }, 14 | 'konachan' => { 'id' => 4, 'name' => 'Clannad Wallpapers (Zoomlayer + Logo + Name)' }, 15 | 'behoimi' => { 'id' => 13, 'name' => 'Rumpalicious!' }, 16 | 'yandere' => { 'id' => 1184, 'name' => 'E☆2 Etsu Magazine vol. 22 2009-12' } 17 | }.freeze 18 | SPECIAL_MATRIX = { 19 | 'danbooru' => 'rating:safe', 20 | 'testbooru' => 'order:rank', 21 | 'konachan' => 'vote:3:opai', 22 | 'behoimi' => 'user:darkgray', 23 | 'yandere' => 'date:2016-05-14' 24 | }.freeze 25 | PER_PAGE = 3 26 | PAGES = 2 27 | POSTS_COUNT = PER_PAGE * PAGES 28 | 29 | SAVER_MATRIX = { 30 | 'default saver' => '', 31 | 'curl' => ' -c', 32 | 'wget' => ' -w' 33 | }.freeze 34 | 35 | BASE_DIR = './features/images'.freeze 36 | BASE_CMD = "ruby danbooru.rb -d #{BASE_DIR}".freeze 37 | 38 | # Clean all files before and after each scenario 39 | Before do 40 | clean_files 41 | end 42 | 43 | After do 44 | clean_files 45 | end 46 | 47 | def clean_files 48 | pools_dirs = POOLS_MATRIX.values.map { |v| v['name'] } 49 | (TAGS_MATRIX.values + pools_dirs + SPECIAL_MATRIX.values).each do |dir| 50 | actual_dir1 = File.join(BASE_DIR, sanitize_filename(dir, pool: false)) 51 | actual_dir2 = File.join(BASE_DIR, sanitize_filename(dir, pool: true)) 52 | FileUtils.rm_r(actual_dir1) if Dir.exist?(actual_dir1) 53 | FileUtils.rm_r(actual_dir2) if Dir.exist?(actual_dir2) 54 | end 55 | end 56 | 57 | def list_files(dir, pattern = '*') 58 | Dir.chdir(dir) do 59 | Dir[pattern] 60 | end 61 | end 62 | 63 | def sanitize_filename(filename, pool: false) 64 | result = filename.gsub(%r{[?*/\\:]}, '_') 65 | space_sub = pool ? '_' : '+' 66 | result.gsub(' ', space_sub) 67 | end 68 | -------------------------------------------------------------------------------- /lib/behoimi.rb: -------------------------------------------------------------------------------- 1 | class Behoimi < Booru 2 | API_BASE_URL = 'http://behoimi.org' 3 | PASSWORD_SALT = 'meganekko-heaven' 4 | API_KEY = false 5 | OLD_API = true 6 | REFERER = 'http://behoimi.org/post/show' 7 | 8 | def initialize(opts) 9 | super 10 | @referer = REFERER 11 | end 12 | 13 | def posts_by_tags(tags, page = 1, limit = LIMIT) 14 | tags = clean_tags(tags) 15 | posts_url = 'post/index.json' 16 | do_request(posts_url, tags: tags, page: page, limit: limit) 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /lib/booru.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift File.dirname(__FILE__) 2 | require 'booru/base' 3 | require 'booru/posts' 4 | require 'booru/pool' 5 | -------------------------------------------------------------------------------- /lib/booru/base.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'json' 3 | require 'nokogiri' 4 | require 'net/http' 5 | require 'net/https' 6 | require 'open-uri' 7 | require 'cgi' 8 | require 'fileutils' 9 | require 'optparse' 10 | require 'digest/sha1' 11 | require 'digest/md5' 12 | 13 | class Booru 14 | API_BASE_URL = 'http://example.com' 15 | PASSWORD_SALT = nil 16 | API_KEY = true 17 | OLD_API = false 18 | USER_AGENT = 'drg/1.0.0' 19 | 20 | attr_accessor :options 21 | 22 | def initialize(opts) 23 | self.options = opts 24 | if options[:base_path] 25 | FileUtils.mkdir_p options[:base_path] 26 | Dir.chdir options[:base_path] 27 | end 28 | FileUtils.mkdir_p options[:storage] if options[:storage] 29 | @referer = self.class::API_BASE_URL 30 | options[:limits][:per_page] ||= 100 31 | end 32 | 33 | private 34 | 35 | def do_request(url, params = {}, method = :get, data = nil, format = :json, url_prepared = false, limit = 10) 36 | full_params = get_query_params(params) 37 | full_url = url_prepared ? url : prepare_url(url, full_params) 38 | uri = URI.join(self.class::API_BASE_URL, full_url) 39 | http_params = { 40 | 'User-Agent' => USER_AGENT, 41 | 'Referer' => @referer 42 | } 43 | 44 | http = Net::HTTP.new(uri.host, uri.port) 45 | if uri.scheme == 'https' 46 | http.use_ssl = true 47 | http.verify_mode = OpenSSL::SSL::VERIFY_NONE 48 | end 49 | case method 50 | when :get 51 | request = Net::HTTP::Get.new(uri.request_uri, http_params) 52 | when :post 53 | request = Net::HTTP::Post.new(uri.request_uri, http_params) 54 | request.content_type = 'application/x-www-form-urlencoded' 55 | request.body = "data=#{data}" if data 56 | end 57 | 58 | response = http.request(request) 59 | 60 | case response 61 | when Net::HTTPSuccess then parse_response(response, format) 62 | when Net::HTTPRedirection 63 | if limit.positive? 64 | do_request(response['location'], params, method, data, format, true, limit - 1) 65 | else 66 | warn 'Too many redirects.' 67 | exit 1 68 | end 69 | else response.value 70 | end 71 | end 72 | 73 | def prepare_url(url, full_params) 74 | [ 75 | url, 76 | full_params.map { |key, val| "#{key}=#{CGI.escape(val.to_s)}" }.join('&') 77 | ].join('?').gsub('%2B', '+') 78 | end 79 | 80 | def get_query_params(params) 81 | unless options[:user].nil? || options[:password].nil? 82 | password_key = API_KEY ? :api_key : :password_hash 83 | params[:login] = options[:user] 84 | params[password_key] = get_password_hash(self.class::PASSWORD_SALT) 85 | end 86 | params 87 | end 88 | 89 | def get_password_hash(salt) 90 | if salt 91 | Digest::SHA1.hexdigest("#{salt}--#{options[:password]}--") 92 | else 93 | options[:password] 94 | end 95 | end 96 | 97 | def parse_response(response, format) 98 | response_ok = true 99 | 100 | case format 101 | when :json 102 | response_hash = JSON.parse(response.body) 103 | response_ok = false if response_hash.include?('success') && response_hash['success'] == false 104 | when :xml 105 | response_hash = Nokogiri::XML(response.body) 106 | response_ok = false if response_hash.root['success'] == 'false' 107 | else 108 | raise 'Unknown format' 109 | end 110 | 111 | return response_hash if response_ok 112 | 113 | raise response_hash 114 | end 115 | 116 | def only_new_api 117 | return unless self.class::OLD_API 118 | 119 | warn 'Supported only with a new API (danbooru.donmai.us)' 120 | exit 1 121 | end 122 | 123 | def only_old_api 124 | return if self.class::OLD_API 125 | 126 | warn 'Supported only with an old API (not danbooru.donmai.us)' 127 | exit 1 128 | end 129 | 130 | def sanitize_filename(filename) 131 | filename.gsub(%r{[?*/\\:]}, '_') 132 | end 133 | end 134 | -------------------------------------------------------------------------------- /lib/booru/pool.rb: -------------------------------------------------------------------------------- 1 | class Booru 2 | # Get pool by id 3 | # http://danbooru.donmai.us/pools/1.json 4 | # https://yande.re/pool/show.json?id=1&page=1 5 | # === Returns 6 | # Hash:: Pool data 7 | def pool(id, page = 1) 8 | if self.class::OLD_API 9 | do_request('pool/show.json', id: id, page: page) 10 | else 11 | do_request("pools/#{id}.json") 12 | end 13 | end 14 | 15 | def download_by_pool(id) 16 | if self.class::OLD_API 17 | data = pool(id) 18 | pool_data = data['pool'] || data.reject { |k| k == 'posts' } 19 | else 20 | pool_data = pool(id) 21 | end 22 | puts "Pool name: #{pool_data['name']}." 23 | pool_dir = sanitize_filename(pool_data['name']) 24 | FileUtils.mkdir_p pool_dir 25 | 26 | bbs_path = File.join(options[:storage] || pool_dir, 'files.bbs') 27 | bbs = File.new(bbs_path, 'a+') 28 | old_bbs = bbs.read 29 | 30 | count = pool_data['post_count'] 31 | if count.zero? 32 | puts 'No posts, nothing to do.' 33 | else 34 | num = 1 35 | if self.class::OLD_API 36 | pages = (count.to_f / data['posts'].count).ceil 37 | 1.upto(pages) do |page| 38 | puts "Page #{page}/#{pages}:" 39 | pool(id, page)['posts'].each do |post_data| 40 | download_post(post_data, pool_dir, num, count, bbs, old_bbs) 41 | num += 1 42 | end 43 | end 44 | else 45 | pool_data['post_ids'].each do |post_id| 46 | post_data = post(post_id) 47 | download_post(post_data, pool_dir, num, count, bbs, old_bbs) 48 | num += 1 49 | end 50 | end 51 | end 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /lib/booru/posts.rb: -------------------------------------------------------------------------------- 1 | class Booru 2 | # Get post by id 3 | # Only new API 4 | # http://danbooru.donmai.us/posts/$id.json 5 | # === Returns 6 | # Hash:: Data of found post 7 | def post(id) 8 | only_new_api 9 | do_request("posts/#{id}.json") 10 | end 11 | 12 | # Get posts by tags 13 | # http://danbooru.donmai.us/posts.json?tags=touhou&page=1&limit=100 14 | # https://yande.re/post.json?tags=touhou&page=1&limit=100 15 | # === Returns 16 | # Array:: Hashes of found posts 17 | def posts_by_tags(tags, page = 1, limit = options[:limits][:per_page]) 18 | tags = clean_tags(tags) 19 | posts_url = self.class::OLD_API ? 'post.json' : 'posts.json' 20 | do_request(posts_url, tags: tags, page: page, limit: limit) 21 | end 22 | 23 | # Get posts count by tag 24 | # http://danbooru.donmai.us/counts/posts.json?tags=touhou 25 | # https://yande.re/post/index.xml?tags=touhou&limit=1 26 | # === Returns 27 | # Integer:: Number of posts 28 | def posts_count_by_tag(tags) 29 | tags = clean_tags(tags) 30 | if self.class::OLD_API 31 | do_request('post/index.xml', { tags: tags, limit: 1 }, :get, nil, :xml).root['count'].to_i 32 | else 33 | do_request('counts/posts.json', tags: tags)['counts']['posts'] 34 | end 35 | end 36 | 37 | def download_by_tags(tags) 38 | tags = clean_tags(tags) 39 | tags_dir = sanitize_filename(tags) 40 | puts "Tags are #{tags}." 41 | FileUtils.mkdir_p tags_dir 42 | 43 | bbs_path = File.join(options[:storage] || tags_dir, 'files.bbs') 44 | bbs = File.new(bbs_path, 'a+') 45 | old_bbs = bbs.read 46 | 47 | count = posts_count_by_tag(tags) 48 | if count.zero? 49 | puts 'No posts, nothing to do.' 50 | else 51 | pages = (count.to_f / options[:limits][:per_page]).ceil 52 | pages = options[:limits][:pages] if options[:limits][:pages] && options[:limits][:pages] < pages 53 | num = 1 54 | 1.upto(pages) do |page| 55 | puts "Page #{page}/#{pages}:" 56 | posts_by_tags(tags, page, options[:limits][:per_page]).each do |post_data| 57 | download_post(post_data, tags_dir, num, count, bbs, old_bbs) 58 | num += 1 59 | end 60 | end 61 | end 62 | end 63 | 64 | private 65 | 66 | def download_post(post_data, target, num, count, bbs, old_bbs) 67 | exit if options[:limits][:posts] && options[:limits][:posts] < num 68 | 69 | # Prepare post data 70 | if post_data['file_url'] 71 | url = get_url(post_data['file_url']) 72 | md5 = post_data['md5'] 73 | else 74 | # url, md5 = get_data_from_html(post_data['id']) 75 | # return if url.nil? 76 | puts "File url is unknown for #{post_data['id']}." 77 | return 78 | end 79 | filename = get_filename(post_data, md5, url) 80 | tag_string = self.class::OLD_API ? post_data['tags'] : post_data['tag_string'] 81 | 82 | path = 83 | if options[:storage] 84 | File.join(options[:storage], filename) 85 | else 86 | File.join(target, filename) 87 | end 88 | if File.exist?(path) && md5 == Digest::MD5.hexdigest(File.read(path)) 89 | puts "File exists - #{path} (#{num}/#{count})" 90 | else 91 | puts "Saving #{path}... (#{num}/#{count})" 92 | download_with_tool(url, path) 93 | puts 'saved!' 94 | end 95 | FileUtils.ln_sf(File.join('..', path), File.join(target, filename)) if options[:storage] 96 | write_tags(filename, tag_string, bbs) unless /^#{filename}/.match?(old_bbs) 97 | end 98 | 99 | def get_url(file_url) 100 | if URI::DEFAULT_PARSER.make_regexp.match?(file_url) 101 | file_url 102 | elsif file_url.start_with?(self.class::API_BASE_URL.gsub('http:', '')) 103 | "http:#{file_url}" 104 | else 105 | self.class::API_BASE_URL + file_url 106 | end 107 | end 108 | 109 | def get_data_from_html(id) 110 | print "File url is unknown for #{id}. Trying HTML... " 111 | begin 112 | html_data = URI.join(self.class::API_BASE_URL, '/posts/', id.to_s).open.read 113 | nokogiri_data = Nokogiri::HTML(html_data) 114 | file_url = nokogiri_data.css('section #image-container').first['data-file-url'] 115 | md5 = nokogiri_data.css('section #image-container').first['data-md5'] 116 | result = [get_url(file_url), md5] 117 | puts 'success!' 118 | rescue StandardError 119 | puts 'fail. Giving up.' 120 | result = [nil, nil] 121 | end 122 | result 123 | end 124 | 125 | def get_filename(post_data, md5, url) 126 | real_filename = CGI.unescape(File.basename(URI.parse(url).path)) 127 | ext = File.extname(real_filename) 128 | 129 | case options[:filename] 130 | when :md5 131 | md5 + ext 132 | when :tags 133 | tags_key = self.class::OLD_API ? 'tags' : 'tag_string' 134 | post_data[tags_key] + ext 135 | when :url 136 | real_filename 137 | else 138 | post_data['id'].to_s + ext 139 | end 140 | end 141 | 142 | def download_with_tool(url, path) 143 | case options[:downloader] 144 | when :wget 145 | `wget -nv "#{url}" -O "#{path}" --user-agent="#{USER_AGENT}" --referer="#{@referer}"` 146 | when :curl 147 | `curl -L -A "#{USER_AGENT}" -e "#{@referer}" --progress-bar -o "#{path}" "#{url}"` 148 | else 149 | File.open(path, 'wb') do |f| 150 | data = URI.parse(url).open('rb', 'User-Agent' => USER_AGENT, 'Referer' => @referer).read 151 | f.write(data) 152 | end 153 | end 154 | end 155 | 156 | def write_tags(filename, tags, bbs) 157 | bbs.puts "#{filename} - #{tags}" 158 | end 159 | 160 | def clean_tags(tags) 161 | tags.tr(' ', '+') 162 | end 163 | end 164 | -------------------------------------------------------------------------------- /lib/danbooru.rb: -------------------------------------------------------------------------------- 1 | class Danbooru < Booru 2 | API_BASE_URL = 'https://danbooru.donmai.us' 3 | end 4 | -------------------------------------------------------------------------------- /lib/konachan.rb: -------------------------------------------------------------------------------- 1 | class Konachan < Booru 2 | API_BASE_URL = 'http://konachan.com' 3 | PASSWORD_SALT = 'So-I-Heard-You-Like-Mupkids-?' 4 | API_KEY = false 5 | OLD_API = true 6 | end 7 | -------------------------------------------------------------------------------- /lib/testbooru.rb: -------------------------------------------------------------------------------- 1 | class Testbooru < Danbooru 2 | API_BASE_URL = 'https://testbooru.donmai.us' 3 | end 4 | -------------------------------------------------------------------------------- /lib/yandere.rb: -------------------------------------------------------------------------------- 1 | class Yandere < Booru 2 | API_BASE_URL = 'https://yande.re' 3 | PASSWORD_SALT = 'choujin-steiner' 4 | API_KEY = false 5 | OLD_API = true 6 | end 7 | --------------------------------------------------------------------------------