├── lib ├── webshot │ ├── version.rb │ ├── errors.rb │ └── screenshot.rb └── webshot.rb ├── CONTRIBUTORS ├── Gemfile ├── test ├── test_helper.rb └── webshot_test.rb ├── .travis.yml ├── .gitignore ├── Rakefile ├── LICENSE.txt ├── webshot.gemspec └── README.md /lib/webshot/version.rb: -------------------------------------------------------------------------------- 1 | module Webshot 2 | VERSION = '0.1.2' 3 | end 4 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | Alex Avoyants 2 | Nick Kezhaya 3 | Rafa García 4 | Sage Ross 5 | -------------------------------------------------------------------------------- /lib/webshot/errors.rb: -------------------------------------------------------------------------------- 1 | module Webshot 2 | class WebshotError < RuntimeError; end 3 | end 4 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in webshot.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'webshot' 3 | require 'minitest/autorun' 4 | require 'minitest/unit' 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | cache: bundler 3 | before_install: 4 | - gem install bundler 5 | rvm: 6 | - 2.5 7 | - 2.6 8 | - 2.7 9 | matrix: 10 | fast_finish: true 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | .ruby-version 19 | .ruby-gemset 20 | test/data/* 21 | 22 | # Vim 23 | *.swp 24 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | require "bundler/gem_tasks" 3 | require "rake/testtask" 4 | require "rdoc/task" 5 | 6 | desc "Default: run tests." 7 | task :default => :test 8 | 9 | 10 | desc "Run webshot unit tests." 11 | Rake::TestTask.new do |t| 12 | t.libs << "test" 13 | t.libs << "lib" 14 | t.test_files = Dir[ "test/*_test.rb" ] 15 | t.verbose = true 16 | end 17 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Vitalie Cherpec 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /webshot.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'webshot/version' 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "webshot" 8 | spec.version = Webshot::VERSION 9 | spec.authors = ["Vitalie Cherpec"] 10 | spec.email = ["vitalie@penguin.ro"] 11 | spec.description = %q{Captures a web page as a screenshot using Poltergeist, Capybara and PhantomJS} 12 | spec.summary = %q{Captures a web page as a screenshot} 13 | spec.homepage = "https://github.com/vitalie/webshot" 14 | spec.license = "MIT" 15 | 16 | spec.files = `git ls-files`.split($/) 17 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } 18 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 19 | spec.require_paths = ["lib"] 20 | 21 | spec.add_development_dependency "rake", "~> 13" 22 | spec.add_development_dependency "minitest", "~> 5.14" 23 | spec.add_development_dependency "gem-release", "~> 2.1" 24 | spec.add_development_dependency "bump", "~> 0.9" 25 | 26 | spec.add_dependency "activesupport", "~> 5" 27 | spec.add_dependency "poltergeist", [">= 1.12.0", "<= 1.18.1"] 28 | spec.add_dependency "faye-websocket", "~> 0.11.0" 29 | spec.add_dependency "mini_magick", "~> 4.9" 30 | end 31 | -------------------------------------------------------------------------------- /lib/webshot.rb: -------------------------------------------------------------------------------- 1 | require "mini_magick" 2 | require "capybara/dsl" 3 | require "capybara/poltergeist" 4 | require "active_support" 5 | require "active_support/core_ext" 6 | require "webshot/version" 7 | require "webshot/errors" 8 | require "webshot/screenshot" 9 | 10 | module Webshot 11 | 12 | ## Browser settings 13 | # Width 14 | mattr_accessor :width 15 | @@width = 1024 16 | 17 | # Height 18 | mattr_accessor :height 19 | @@height = 768 20 | 21 | # User agent 22 | mattr_accessor :user_agent 23 | @@user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.43 Safari/537.31" 24 | 25 | # Customize settings 26 | def self.setup 27 | yield self 28 | end 29 | 30 | # Capybara setup 31 | def self.capybara_setup! 32 | # By default Capybara will try to boot a rack application 33 | # automatically. You might want to switch off Capybara's 34 | # rack server if you are running against a remote application 35 | Capybara.run_server = false 36 | Capybara.register_driver :poltergeist do |app| 37 | Capybara::Poltergeist::Driver.new(app, { 38 | # Raise JavaScript errors to Ruby 39 | js_errors: false, 40 | # Additional command line options for PhantomJS 41 | phantomjs_options: ['--ignore-ssl-errors=yes', '--ssl-protocol=any'] 42 | }) 43 | end 44 | Capybara.current_driver = :poltergeist 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /test/webshot_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | 3 | class WebshotTest < Minitest::Test 4 | DATA_DIR = File.expand_path(File.dirname(__FILE__) + "/data") 5 | 6 | def setup 7 | FileUtils.mkdir_p(DATA_DIR) unless File.directory?(DATA_DIR) 8 | @webshot = Webshot::Screenshot.instance 9 | end 10 | 11 | def test_http 12 | %w(www.yahoo.com).each do |name| 13 | output = thumb(name) 14 | File.delete output if File.exist? output 15 | @webshot.capture "http://#{name}/", output 16 | assert File.exist? output 17 | end 18 | end 19 | 20 | def test_https 21 | %w(github.com).each do |name| 22 | output = thumb(name) 23 | File.delete output if File.exist? output 24 | @webshot.capture "https://#{name}/", output 25 | assert File.exist? output 26 | end 27 | end 28 | 29 | def test_invalid_url 30 | %w(nxdomain).each do |name| 31 | assert_raises Webshot::WebshotError do 32 | @webshot.capture "http://#{name}/", thumb(name) 33 | end 34 | end 35 | end 36 | 37 | def test_mini_magick 38 | %w(www.yahoo.com).each do |name| 39 | output = thumb(name) 40 | File.delete output if File.exist? output 41 | 42 | # Customize MiniMagick options 43 | result = @webshot.capture("http://#{name}/", output) do |thumb| 44 | thumb.combine_options do |c| 45 | c.thumbnail "100x90" 46 | end 47 | end 48 | assert File.exist? output 49 | assert result.respond_to? :to_blob 50 | end 51 | end 52 | 53 | def test_allowed_404_status 54 | %w(en.wikipedia.org/wiki/THIS_PAGE_DOES_NOT_EXIST).each do |name| 55 | output = thumb('404_example') 56 | File.delete output if File.exist? output 57 | 58 | @webshot.capture "https://#{name}/", output, allowed_status_codes: [404] 59 | assert File.exist? output 60 | end 61 | end 62 | 63 | protected 64 | 65 | def thumb(name) 66 | File.join(DATA_DIR, "#{name}.png") 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Webshot 2 | 3 | Captures a web page as a screenshot using Poltergeist, Capybara and PhantomJS. 4 | 5 | - [![Build Status](https://travis-ci.org/vitalie/webshot.svg?branch=master)](https://travis-ci.org/vitalie/webshot) 6 | 7 | ## Installation 8 | 9 | Download and install [PhantomJS](http://phantomjs.org/releases.html) version 2.X, 10 | add the directory containing the binary to your PATH. 11 | 12 | Add the `webshot` gem to your Gemfile: 13 | 14 | gem "webshot" 15 | 16 | And then execute: 17 | 18 | $ bundle 19 | 20 | Or install it yourself as: 21 | 22 | $ gem install webshot 23 | 24 | ## Usage 25 | 26 | ```rb 27 | # Setup Capybara 28 | ws = Webshot::Screenshot.instance 29 | 30 | # Capture Google's home page 31 | ws.capture "http://www.google.com/", "google.png" 32 | 33 | # Customize thumbnail 34 | ws.capture "http://www.google.com/", "google.png", width: 100, height: 90, quality: 85 35 | 36 | # Specify only width, height will be computed according to page's height 37 | ws.capture "http://www.google.com/", "google.png", width: 1024 38 | 39 | # Specify an array of additional HTTP status codes to accept, 40 | # beyond normal success codes like 200 or 302 41 | ws.capture "http://www.google.com/foo", "google_404.png", allowed_status_codes: [404] 42 | 43 | # Customize thumbnail generation (MiniMagick) 44 | # see: https://github.com/minimagick/minimagick 45 | ws.capture("http://www.google.com/", "google.png") do |magick| 46 | magick.combine_options do |c| 47 | c.thumbnail "100x" 48 | c.background "white" 49 | c.extent "100x90" 50 | c.gravity "north" 51 | c.quality 85 52 | end 53 | end 54 | 55 | ``` 56 | 57 | You can wait some time after visit page before capturing screenshot. 58 | 59 | ```rb 60 | # Timeout in seconds 61 | ws.capture 'http://www.google.com/', 'google.png', timeout: 2 62 | 63 | ``` 64 | 65 | You can login before capturing screenshot. 66 | 67 | ```rb 68 | ws.start_session do 69 | visit 'https://github.com/login' 70 | fill_in 'Username or Email', :with => 'user@example.com' 71 | fill_in 'Password', :with => 'password' 72 | click_button 'Sign in' 73 | end.capture 'https://github.com/username/', 'github.png' 74 | 75 | ``` 76 | 77 | ## Scaling 78 | 79 | It's not recommended to start multiple PhantomJS concurrently. 80 | You should serialize requests, treat the process as unreliable and 81 | monitor it with daemontools, god, monit, etc. 82 | 83 | Recommended setup: 84 | 85 | [S3] <-- [CloudFront + 404 handler] <-- User Request 86 | ^ 87 | | 88 | Worker <--> [Queue] <-- App 89 | 90 | 91 | The application triggers screenshot requests which are queued and 92 | then processed by a background worker ([Resque](https://github.com/resque/resque), [Sidekiq](https://github.com/mperham/sidekiq), etc). 93 | The worker uploads the images to S3 which are served through 94 | CloudFront. The CDN should be configured to serve a default 95 | image (404 handler) with a low TTL to handle screenshot are not 96 | yet ready or couldn't be generated. 97 | 98 | Notes: 99 | - sed 's/S3/Your file hosting service/g' 100 | - sed 's/CloudFront/Your CDN service/g' 101 | 102 | ## Contributing 103 | 104 | 1. Fork it 105 | 2. Create your feature branch (`git checkout -b my-new-feature`) 106 | 3. Commit your changes (`git commit -am 'Add some feature'`) 107 | 4. Push to the branch (`git push origin my-new-feature`) 108 | 5. Create new Pull Request 109 | -------------------------------------------------------------------------------- /lib/webshot/screenshot.rb: -------------------------------------------------------------------------------- 1 | require "singleton" 2 | 3 | module Webshot 4 | class Screenshot 5 | include Capybara::DSL 6 | include Singleton 7 | 8 | def initialize(opts = {}) 9 | Webshot.capybara_setup! 10 | width = opts.fetch(:width, Webshot.width) 11 | height = opts.fetch(:height, Webshot.height) 12 | user_agent = opts.fetch(:user_agent, Webshot.user_agent) 13 | 14 | # Browser settings 15 | page.driver.resize(width, height) 16 | page.driver.headers = { 17 | "User-Agent" => user_agent 18 | } 19 | end 20 | 21 | def start_session(&block) 22 | Capybara.reset_sessions! 23 | Capybara.current_session.instance_eval(&block) if block_given? 24 | @session_started = true 25 | self 26 | end 27 | 28 | def valid_status_code?(status_code, allowed_status_codes) 29 | return true if status_code == 200 30 | return true if status_code / 100 == 3 31 | return true if allowed_status_codes.include?(status_code) 32 | false 33 | end 34 | 35 | # Captures a screenshot of +url+ saving it to +path+. 36 | def capture(url, path, opts = {}) 37 | begin 38 | # Default settings 39 | width = opts.fetch(:width, 120) 40 | height = opts.fetch(:height, 90) 41 | gravity = opts.fetch(:gravity, "north") 42 | quality = opts.fetch(:quality, 85) 43 | full = opts.fetch(:full, true) 44 | selector = opts.fetch(:selector, nil) 45 | allowed_status_codes = opts.fetch(:allowed_status_codes, []) 46 | 47 | # Reset session before visiting url 48 | Capybara.reset_sessions! unless @session_started 49 | @session_started = false 50 | 51 | # Open page 52 | visit url 53 | 54 | # Timeout 55 | sleep opts[:timeout] if opts[:timeout] 56 | 57 | # Check response code 58 | status_code = page.driver.status_code.to_i 59 | unless valid_status_code?(status_code, allowed_status_codes) 60 | fail WebshotError, "Could not fetch page: #{url.inspect}, error code: #{page.driver.status_code}" 61 | end 62 | 63 | tmp = Tempfile.new(["webshot", ".png"]) 64 | tmp.close 65 | begin 66 | screenshot_opts = { full: full } 67 | screenshot_opts = screenshot_opts.merge({ selector: selector }) if selector 68 | 69 | # Save screenshot to file 70 | page.driver.save_screenshot(tmp.path, screenshot_opts) 71 | 72 | # Resize screenshot 73 | thumb = MiniMagick::Image.open(tmp.path) 74 | if block_given? 75 | # Customize MiniMagick options 76 | yield thumb 77 | else 78 | thumb.combine_options do |c| 79 | c.thumbnail "#{width}x" 80 | c.background "white" 81 | c.extent "#{width}x#{height}" 82 | c.gravity gravity 83 | c.quality quality 84 | end 85 | end 86 | 87 | # Save thumbnail 88 | thumb.write path 89 | thumb 90 | ensure 91 | tmp.unlink 92 | end 93 | rescue Capybara::Poltergeist::StatusFailError, Capybara::Poltergeist::BrowserError, Capybara::Poltergeist::DeadClient, Capybara::Poltergeist::TimeoutError, Errno::EPIPE => e 94 | # TODO: Handle Errno::EPIPE and Errno::ECONNRESET 95 | raise WebshotError.new("Capybara error: #{e.message.inspect}") 96 | end 97 | end 98 | end 99 | end 100 | --------------------------------------------------------------------------------