├── .gitignore ├── Gemfile ├── ImageScraper.gemspec ├── LICENSE.txt ├── README.md ├── Rakefile └── lib ├── ImageScraper └── version.rb └── image_scraper.rb /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | *.bundle 11 | *.so 12 | *.o 13 | *.a 14 | mkmf.log 15 | *.gem 16 | *.swp 17 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in ImageScraper.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /ImageScraper.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'ImageScraper/version' 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "ImageScraper" 8 | spec.version = ImageScraper::VERSION 9 | spec.authors = ["Sam Radhakrishnan"] 10 | spec.email = ["sk09idm@gmail.com"] 11 | spec.summary = %q{A gem to download images from url} 12 | spec.description = %q{A gem to download all the images at a particular URL} 13 | spec.homepage = "https://github.com/sam09/ImageScraper" 14 | spec.license = "MIT" 15 | 16 | spec.files = `git ls-files -z`.split("\x0") 17 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } 18 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 19 | spec.require_paths = ["lib"] 20 | 21 | spec.add_development_dependency "bundler", "~> 1.7" 22 | spec.add_development_dependency "rake", "~> 10.0" 23 | spec.add_development_dependency "open-uri" 24 | spec.add_development_dependency "nokogiri" 25 | spec.add_development_dependency "rubygems" 26 | end 27 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Sam Radhakrishnan 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ImageScraper 2 | 3 | A gem which downloads all the images from a given url. 4 | 5 | ## Installation 6 | 7 | Add this line to your application's Gemfile: 8 | 9 | ```ruby 10 | gem 'ImageScraper' 11 | ``` 12 | 13 | And then execute: 14 | 15 | $ bundle 16 | 17 | Or install it yourself as: 18 | 19 | $ gem install ImageScraper 20 | 21 | ## Usage 22 | 23 | * Use the gem in your scrpit by adding the following line in your code 24 | 25 | `require 'ImageScraper'` 26 | * To download all images at a given url 27 | 28 | `ImageScraper.get_all_images(url)` 29 | 30 | ## Contributing 31 | 32 | 1. Fork it ( https://github.com/sam09/ImageScraper/fork ) 33 | 2. Create your feature branch (`git checkout -b my-new-feature`) 34 | 3. Commit your changes (`git commit -am 'Add some feature'`) 35 | 4. Push to the branch (`git push origin my-new-feature`) 36 | 5. Create a new Pull Request 37 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | 3 | -------------------------------------------------------------------------------- /lib/ImageScraper/version.rb: -------------------------------------------------------------------------------- 1 | module ImageScraper 2 | VERSION = "0.0.2" 3 | end 4 | -------------------------------------------------------------------------------- /lib/image_scraper.rb: -------------------------------------------------------------------------------- 1 | require "ImageScraper/version" 2 | require "nokogiri" 3 | require "open-uri" 4 | 5 | module ImageScraper 6 | 7 | 8 | class Scraper 9 | #The constructor 10 | def initialize(url, folder = "Images") 11 | @url = url 12 | @folder = folder 13 | end 14 | 15 | #A function to get HTML of page at a url 16 | def get_page() 17 | @page = Nokogiri::HTML(open(@url).read) 18 | end 19 | 20 | 21 | def get_new_url() 22 | new_url = /([a-z0-9]|_)+.(html)|(php)$/.match(@url) 23 | puts new_url.to_s 24 | n = new_url.to_s 25 | return @url.slice! n 26 | end 27 | 28 | #A method to get image links 29 | def get_img_links() 30 | img = @page.css('img') 31 | @imgLinks = Array.new 32 | new_url = self.get_new_url() 33 | 34 | if @url[@url.length-1] != '/' 35 | @url += "/" 36 | end 37 | print new_url 38 | 39 | img.each do |i| 40 | @imgLinks.push(@url+i["src"]) 41 | end 42 | end 43 | 44 | #A method to download images 45 | def download() 46 | get_page 47 | puts "Page Found" 48 | get_img_names 49 | get_img_links 50 | len = @imgLinks.length 51 | a = @imgLinks 52 | files = @files 53 | len.times do |f| 54 | puts "#{a[f]} found" 55 | File.open(files[f], "w") do |fo| 56 | fo.write open(a[f]).read 57 | end 58 | puts "#{files[f]} downloaded" 59 | end 60 | end 61 | 62 | #A method to get all image names 63 | def get_img_names() 64 | if not File.exists?(@folder) 65 | Dir.mkdir(@folder) 66 | puts "#{@folder} Directory Created" 67 | end 68 | 69 | @files = Array.new 70 | img = @page.css('img') 71 | img.each do |i| 72 | nodes = i["src"].split("/") 73 | @files.push(@folder + "/" + nodes[nodes.length - 1]) 74 | end 75 | end 76 | end 77 | 78 | #A method to download all files at a given url 79 | def self.get_all_images(url) 80 | s = Scraper.new(url) 81 | s.download() 82 | end 83 | end 84 | --------------------------------------------------------------------------------