├── .gitignore
├── Gemfile
├── ImageScraper.gemspec
├── LICENSE.txt
├── README.md
├── Rakefile
└── lib
    ├── ImageScraper
        └── version.rb
    └── image_scraper.rb


/.gitignore:
--------------------------------------------------------------------------------
 1 | /.bundle/
 2 | /.yardoc
 3 | /Gemfile.lock
 4 | /_yardoc/
 5 | /coverage/
 6 | /doc/
 7 | /pkg/
 8 | /spec/reports/
 9 | /tmp/
10 | *.bundle
11 | *.so
12 | *.o
13 | *.a
14 | mkmf.log
15 | *.gem
16 | *.swp
17 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | 
3 | # Specify your gem's dependencies in ImageScraper.gemspec
4 | gemspec
5 | 


--------------------------------------------------------------------------------
/ImageScraper.gemspec:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | lib = File.expand_path('../lib', __FILE__)
 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 4 | require 'ImageScraper/version'
 5 | 
 6 | Gem::Specification.new do |spec|
 7 |   spec.name          = "ImageScraper"
 8 |   spec.version       = ImageScraper::VERSION
 9 |   spec.authors       = ["Sam Radhakrishnan"]
10 |   spec.email         = ["sk09idm@gmail.com"]
11 |   spec.summary       = %q{A gem to download images from url}
12 |   spec.description   = %q{A gem to download all the images at a particular URL}
13 |   spec.homepage      = "https://github.com/sam09/ImageScraper"
14 |   spec.license       = "MIT"
15 | 
16 |   spec.files         = `git ls-files -z`.split("\x0")
17 |   spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18 |   spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
19 |   spec.require_paths = ["lib"]
20 | 
21 |   spec.add_development_dependency "bundler", "~> 1.7"
22 |   spec.add_development_dependency "rake", "~> 10.0"
23 |   spec.add_development_dependency "open-uri"
24 |   spec.add_development_dependency "nokogiri"
25 |   spec.add_development_dependency "rubygems"
26 | end
27 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Sam Radhakrishnan
 2 | 
 3 | MIT License
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ImageScraper
 2 | 
 3 | A gem which downloads all the images from a given url.
 4 | 
 5 | ## Installation
 6 | 
 7 | Add this line to your application's Gemfile:
 8 | 
 9 | ```ruby
10 | gem 'ImageScraper'
11 | ```
12 | 
13 | And then execute:
14 | 
15 |     $ bundle
16 | 
17 | Or install it yourself as:
18 | 
19 |     $ gem install ImageScraper
20 | 
21 | ## Usage
22 | 
23 | * Use the gem in your scrpit by adding the following line in your code
24 | 
25 |      `require 'ImageScraper'`
26 | * To download all images at a given url
27 |      
28 |      `ImageScraper.get_all_images(url)`
29 | 
30 | ## Contributing
31 | 
32 | 1. Fork it ( https://github.com/sam09/ImageScraper/fork )
33 | 2. Create your feature branch (`git checkout -b my-new-feature`)
34 | 3. Commit your changes (`git commit -am 'Add some feature'`)
35 | 4. Push to the branch (`git push origin my-new-feature`)
36 | 5. Create a new Pull Request
37 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require "bundler/gem_tasks"
2 | 
3 | 


--------------------------------------------------------------------------------
/lib/ImageScraper/version.rb:
--------------------------------------------------------------------------------
1 | module ImageScraper
2 |   VERSION = "0.0.2"
3 | end
4 | 


--------------------------------------------------------------------------------
/lib/image_scraper.rb:
--------------------------------------------------------------------------------
 1 | require "ImageScraper/version"
 2 | require "nokogiri"
 3 | require "open-uri"
 4 | 
 5 | module ImageScraper
 6 | 
 7 | 
 8 |   class Scraper
 9 |     #The constructor
10 |     def initialize(url, folder = "Images")
11 |       @url = url
12 |       @folder = folder
13 |     end
14 | 
15 |     #A function to get HTML of page at a url
16 |     def get_page()
17 |       @page = Nokogiri::HTML(open(@url).read)
18 |     end
19 | 
20 | 
21 |     def get_new_url()
22 |       new_url = /([a-z0-9]|_)+.(html)|(php)$/.match(@url)
23 |       puts new_url.to_s
24 |       n = new_url.to_s
25 |       return @url.slice! n
26 |     end
27 | 
28 |     #A method to get image links
29 |     def get_img_links()
30 |       img = @page.css('img')
31 |       @imgLinks = Array.new
32 |       new_url = self.get_new_url()
33 | 		
34 |       if @url[@url.length-1] != '/'
35 |         @url += "/"
36 |       end
37 |       print new_url
38 | 
39 |       img.each do |i|
40 |         @imgLinks.push(@url+i["src"])
41 |       end
42 |     end
43 | 
44 |     #A method to download images
45 |     def download()
46 |       get_page
47 |       puts "Page Found"
48 |       get_img_names
49 |       get_img_links
50 |       len = @imgLinks.length
51 |       a = @imgLinks
52 |       files = @files
53 |       len.times do |f|
54 |         puts "#{a[f]} found"
55 |         File.open(files[f], "w") do |fo|	
56 |           fo.write open(a[f]).read
57 | 		end
58 |         puts "#{files[f]} downloaded"
59 |       end
60 |     end
61 | 
62 |     #A method to get all image names
63 |     def get_img_names()
64 |       if not File.exists?(@folder)
65 |         Dir.mkdir(@folder)
66 |         puts "#{@folder} Directory Created"
67 |       end
68 | 	  
69 |       @files = Array.new
70 |       img = @page.css('img')
71 |       img.each do |i|
72 |         nodes = i["src"].split("/")
73 |         @files.push(@folder + "/" + nodes[nodes.length - 1])
74 |       end
75 |     end
76 |   end
77 | 
78 |   #A method to download all files at a given url
79 |   def self.get_all_images(url)
80 |     s = Scraper.new(url)
81 |     s.download()
82 |   end
83 | end
84 | 


--------------------------------------------------------------------------------