├── .document ├── .gitignore ├── .rspec ├── CHANGELOG.txt ├── Gemfile ├── Gemfile.lock ├── LICENSE.txt ├── README.md ├── Rakefile ├── VERSION ├── lib ├── open_graph.rb ├── opengraph_parser.rb └── redirect_follower.rb ├── opengraph_parser.gemspec └── spec ├── lib ├── open_graph_spec.rb └── redirect_follower_spec.rb ├── spec_helper.rb └── view ├── opengraph.html ├── opengraph_no_meta_nor_description.html └── opengraph_no_metadata.html /.document: -------------------------------------------------------------------------------- 1 | lib/**/*.rb 2 | bin/* 3 | - 4 | features/**/*.feature 5 | LICENSE.txt 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # rcov generated 2 | coverage 3 | coverage.data 4 | 5 | # rdoc generated 6 | rdoc 7 | 8 | # yard generated 9 | doc 10 | .yardoc 11 | 12 | # bundler 13 | .bundle 14 | 15 | # jeweler generated 16 | pkg 17 | 18 | # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore: 19 | # 20 | # * Create a file at ~/.gitignore 21 | # * Include files you want ignored 22 | # * Run: git config --global core.excludesfile ~/.gitignore 23 | # 24 | # After doing this, these files will be ignored in all your git projects, 25 | # saving you from having to 'pollute' every project you touch with them 26 | # 27 | # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line) 28 | # 29 | # For MacOS: 30 | # 31 | #.DS_Store 32 | 33 | # For TextMate 34 | #*.tmproj 35 | #tmtags 36 | 37 | # For emacs: 38 | #*~ 39 | #\#* 40 | #.\#* 41 | 42 | # For vim: 43 | #*.swp 44 | 45 | # For redcar: 46 | #.redcar 47 | 48 | # For rubinius: 49 | #*.rbc 50 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | -------------------------------------------------------------------------------- /CHANGELOG.txt: -------------------------------------------------------------------------------- 1 | Version 0.2.4 2 | - Ruby 3 support 3 | - Support redirect_limit as parameter 4 | - Fix minor bugs 5 | 6 | Version 0.2.3 7 | - Support passing html body as parameter 8 | 9 | Version 0.2.2 10 | - Fix major bug on 0.2.1 causing normal http request not working. 11 | 12 | Version 0.2.1 13 | - Allow user to add custom headers when requesting URL 14 | 15 | Version 0.2.0 16 | - Support HTTPS 17 | - Add original_images attribute 18 | - Refactor method converting relative url to absolute url 19 | - Generic structure for metadata 20 | 21 | Version 0.1.3 22 | - Fix Gemfile 23 | 24 | Version 0.1.2 25 | - Modify xpath for fallback searching in tag to work in un-wellformed xml format. 26 | 27 | Version 0.1.1 28 | - Use addressable gem to handle better url 29 | - Check image urls to handle both relative and absolute urls. 30 | 31 | Version 0.1.0 32 | - Implement OpenGraph library 33 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | # Add dependencies required to use your gem here. 3 | # Example: 4 | # gem "activesupport", ">= 2.3.5" 5 | 6 | # Add dependencies to develop your gem here. 7 | # Include everything needed to run rake, tests, features, etc. 8 | group :development do 9 | gem "rspec" 10 | gem "rdoc" 11 | gem "bundler" 12 | end 13 | 14 | gem "nokogiri" 15 | gem "addressable" 16 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | addressable (2.8.0) 5 | public_suffix (>= 2.0.2, < 5.0) 6 | diff-lcs (1.4.4) 7 | mini_portile2 (2.6.1) 8 | nokogiri (1.12.5) 9 | mini_portile2 (~> 2.6.1) 10 | racc (~> 1.4) 11 | public_suffix (4.0.6) 12 | racc (1.6.0) 13 | rdoc (6.3.3) 14 | rspec (3.10.0) 15 | rspec-core (~> 3.10.0) 16 | rspec-expectations (~> 3.10.0) 17 | rspec-mocks (~> 3.10.0) 18 | rspec-core (3.10.1) 19 | rspec-support (~> 3.10.0) 20 | rspec-expectations (3.10.1) 21 | diff-lcs (>= 1.2.0, < 2.0) 22 | rspec-support (~> 3.10.0) 23 | rspec-mocks (3.10.2) 24 | diff-lcs (>= 1.2.0, < 2.0) 25 | rspec-support (~> 3.10.0) 26 | rspec-support (3.10.3) 27 | 28 | PLATFORMS 29 | ruby 30 | 31 | DEPENDENCIES 32 | addressable 33 | bundler 34 | nokogiri 35 | rdoc 36 | rspec 37 | 38 | BUNDLED WITH 39 | 2.2.25 40 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Huy Ha 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpengraphParser 2 | 3 | OpengraphParser is a simple Ruby library for parsing Open Graph protocol information from a website. Learn more about the protocol at: 4 | http://ogp.me 5 | 6 | ## Installation 7 | 8 | ```bash 9 | gem install opengraph_parser 10 | ``` 11 | 12 | or add to Gemfile 13 | 14 | ```bash 15 | gem "opengraph_parser" 16 | ``` 17 | 18 | ## Usage 19 | 20 | ### Parsing an URL 21 | 22 | ```ruby 23 | og = OpenGraph.new("http://ogp.me") 24 | og.title # => "Open Graph protocol" 25 | og.type # => "website" 26 | og.url # => "http://ogp.me/" 27 | og.description # => "The Open Graph protocol enables any web page to become a rich object in a social graph." 28 | og.images # => ["http://ogp.me/logo.png"] 29 | ``` 30 | 31 | You can also get other Open Graph metadata as: 32 | 33 | ```ruby 34 | og.metadata # => {"og:image:type"=>"image/png", "og:image:width"=>"300", "og:image:height"=>"300"} 35 | ``` 36 | 37 | ### Parsing a HTML document 38 | 39 | ```ruby 40 | og = OpenGraph.new(html_string) 41 | ``` 42 | 43 | ### Custom header fields 44 | In some cases you may need to change fields in HTTP request header for an URL 45 | ```ruby 46 | og = OpenGraph.new("http://opg.me", { :headers => {'User-Agent' => 'Custom User Agent'} }) 47 | ``` 48 | 49 | ### Fallback 50 | If you try to parse Open Graph information for a website that doesn’t have any Open Graph metadata, the library will try to find other information in the website as the following rules: 51 | 52 | * `` for title 53 | * `<meta name="description">` for description 54 | * `<link rel="image_src">` or all `<img>` tags for images 55 | 56 | You can disable this fallback lookup by passing false to init method: 57 | 58 | ```ruby 59 | og = OpenGraph.new("http://ogp.me", false) 60 | ``` 61 | 62 | ## Contributing to opengraph_parser 63 | 64 | * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet. 65 | * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it. 66 | * Fork the project. 67 | * Start a feature/bugfix branch. 68 | * Commit and push until you are happy with your contribution. 69 | * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally. 70 | * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it. 71 | 72 | ## Copyright 73 | 74 | Copyright (c) 2013 Huy Ha. See LICENSE.txt for further details. 75 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require 'rubygems' 4 | require 'bundler' 5 | begin 6 | Bundler.setup(:default, :development) 7 | rescue Bundler::BundlerError => e 8 | $stderr.puts e.message 9 | $stderr.puts "Run `bundle install` to install missing gems" 10 | exit e.status_code 11 | end 12 | require 'rake' 13 | 14 | require 'jeweler' 15 | Jeweler::Tasks.new do |gem| 16 | # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options 17 | gem.name = "opengraph_parser" 18 | gem.homepage = "http://github.com/huyha85/opengraph_parser" 19 | gem.license = "MIT" 20 | gem.summary = %Q{A simple Ruby library for parsing Open Graph Protocol information from a website.} 21 | gem.description = %Q{A simple Ruby library for parsing Open Graph Protocol information from a website. It also includes a fallback solution when the website has no Open Graph information.} 22 | gem.email = "hhuy424@gmail.com" 23 | gem.authors = ["Huy Ha", "Duc Trinh"] 24 | # dependencies defined in Gemfile 25 | gem.files = Dir.glob('lib/**/*.rb') 26 | end 27 | Jeweler::RubygemsDotOrgTasks.new 28 | 29 | require 'rspec/core' 30 | require 'rspec/core/rake_task' 31 | RSpec::Core::RakeTask.new(:spec) do |spec| 32 | spec.pattern = FileList['spec/**/*_spec.rb'] 33 | end 34 | 35 | RSpec::Core::RakeTask.new(:rcov) do |spec| 36 | spec.pattern = 'spec/**/*_spec.rb' 37 | spec.rcov = true 38 | end 39 | 40 | task :default => :spec 41 | 42 | require 'rdoc/task' 43 | Rake::RDocTask.new do |rdoc| 44 | version = File.exist?('VERSION') ? File.read('VERSION') : "" 45 | 46 | rdoc.rdoc_dir = 'rdoc' 47 | rdoc.title = "opengraph_parser #{version}" 48 | rdoc.rdoc_files.include('README*') 49 | rdoc.rdoc_files.include('lib/**/*.rb') 50 | end 51 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.2.4 2 | -------------------------------------------------------------------------------- /lib/open_graph.rb: -------------------------------------------------------------------------------- 1 | require 'nokogiri' 2 | require 'redirect_follower' 3 | require "addressable/uri" 4 | require 'uri' 5 | 6 | class OpenGraph 7 | attr_accessor :src, :url, :type, :title, :description, :images, :metadata, :response, :original_images, :html_content 8 | 9 | def initialize(src, fallback = true, options = {}) 10 | if fallback.is_a? Hash 11 | options = fallback 12 | fallback = true 13 | end 14 | @src = src 15 | @body = nil 16 | @images = [] 17 | @metadata = {} 18 | parse_opengraph(options) 19 | load_fallback if fallback 20 | check_images_path 21 | end 22 | 23 | private 24 | def parse_opengraph(options = {}) 25 | begin 26 | if @src.include? '</html>' 27 | @body = @src 28 | @html_content = true 29 | else 30 | @body = RedirectFollower.new(@src, options).resolve.body 31 | @html_content = false 32 | end 33 | rescue 34 | @title = @url = @src 35 | return 36 | end 37 | 38 | if @body 39 | attrs_list = %w(title url type description) 40 | doc = Nokogiri.parse(@body) 41 | doc.css('meta').each do |m| 42 | if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i) 43 | m_content = m.attribute('content').to_s.strip 44 | metadata_name = m.attribute('property').to_s.gsub("og:", "") 45 | @metadata = add_metadata(@metadata, metadata_name, m_content) 46 | case metadata_name 47 | when *attrs_list 48 | self.instance_variable_set("@#{metadata_name}", m_content) unless m_content.empty? 49 | when "image" 50 | add_image(m_content) 51 | end 52 | end 53 | end 54 | end 55 | end 56 | 57 | def load_fallback 58 | if @body 59 | doc = Nokogiri.parse(@body) 60 | 61 | if @title.to_s.empty? && doc.xpath("//head//title").size > 0 62 | @title = doc.xpath("//head//title").first.text.to_s.strip 63 | end 64 | 65 | @url = @src if @url.to_s.empty? 66 | 67 | if @description.to_s.empty? && description_meta = doc.xpath("//head//meta[@name='description']").first 68 | @description = description_meta.attribute("content").to_s.strip 69 | end 70 | 71 | if @description.to_s.empty? 72 | @description = fetch_first_text(doc) 73 | end 74 | 75 | fetch_images(doc, "//head//link[@rel='image_src']", "href") if @images.empty? 76 | fetch_images(doc, "//img", "src") if @images.empty? 77 | end 78 | end 79 | 80 | def check_images_path 81 | @original_images = @images.dup 82 | 83 | uri = Addressable::URI.parse(@url || @src) 84 | 85 | return unless uri 86 | 87 | imgs = @images.dup 88 | @images = [] 89 | imgs.each do |img| 90 | if Addressable::URI.parse(img).host.nil? 91 | full_path = uri.join(img).to_s 92 | add_image(full_path) 93 | else 94 | add_image(img) 95 | end 96 | end 97 | end 98 | 99 | def add_image(image_url) 100 | @images << image_url unless @images.include?(image_url) || image_url.to_s.empty? 101 | end 102 | 103 | def fetch_images(doc, xpath_str, attr) 104 | doc.xpath(xpath_str).each do |link| 105 | add_image(link.attribute(attr).to_s.strip) 106 | end 107 | end 108 | 109 | def fetch_first_text(doc) 110 | doc.xpath('//p').each do |p| 111 | s = p.text.to_s.strip 112 | return s if s.length > 20 113 | end 114 | end 115 | 116 | def add_metadata(metadata_container, path, content) 117 | path_elements = path.split(':') 118 | if path_elements.size > 1 119 | current_element = path_elements.delete_at(0) 120 | path = path_elements.join(':') 121 | if metadata_container[current_element.to_sym] 122 | path_pointer = metadata_container[current_element.to_sym].last 123 | index_count = metadata_container[current_element.to_sym].size 124 | metadata_container[current_element.to_sym][index_count - 1] = add_metadata(path_pointer, path, content) 125 | metadata_container 126 | else 127 | metadata_container[current_element.to_sym] = [] 128 | metadata_container[current_element.to_sym] << add_metadata({}, path, content) 129 | metadata_container 130 | end 131 | else 132 | metadata_container[path.to_sym] ||= [] 133 | metadata_container[path.to_sym] << {'_value'.to_sym => content} 134 | metadata_container 135 | end 136 | end 137 | end 138 | -------------------------------------------------------------------------------- /lib/opengraph_parser.rb: -------------------------------------------------------------------------------- 1 | require 'open_graph' -------------------------------------------------------------------------------- /lib/redirect_follower.rb: -------------------------------------------------------------------------------- 1 | require 'net/https' 2 | 3 | class RedirectFollower 4 | REDIRECT_DEFAULT_LIMIT = 5 5 | class TooManyRedirects < StandardError; end 6 | 7 | attr_accessor :url, :body, :redirect_limit, :response, :headers 8 | 9 | def initialize(url, options = {}) 10 | @url = url 11 | @redirect_limit = options[:redirect_limit] || REDIRECT_DEFAULT_LIMIT 12 | @headers = options[:headers] || {} 13 | end 14 | 15 | def resolve 16 | raise TooManyRedirects if redirect_limit < 0 17 | 18 | uri = Addressable::URI.parse(url) 19 | 20 | http = Net::HTTP.new(uri.host, uri.inferred_port) 21 | if uri.scheme == 'https' 22 | http.use_ssl = true 23 | http.verify_mode = OpenSSL::SSL::VERIFY_PEER 24 | end 25 | 26 | self.response = http.request_get(uri.request_uri, @headers) 27 | 28 | if response.kind_of?(Net::HTTPRedirection) 29 | self.url = redirect_url 30 | self.redirect_limit -= 1 31 | resolve 32 | end 33 | 34 | self.body = response.body 35 | self 36 | end 37 | 38 | def redirect_url 39 | if response['location'].nil? 40 | response.body.match(/<a href=\"([^>]+)\">/i)[1] 41 | else 42 | response['location'] 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /opengraph_parser.gemspec: -------------------------------------------------------------------------------- 1 | # Generated by jeweler 2 | # DO NOT EDIT THIS FILE DIRECTLY 3 | # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec' 4 | # -*- encoding: utf-8 -*- 5 | 6 | Gem::Specification.new do |s| 7 | s.name = "opengraph_parser" 8 | s.version = "0.2.4" 9 | 10 | s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= 11 | s.authors = ["Huy Ha", "Duc Trinh"] 12 | s.date = "2021-12-23" 13 | s.description = "A simple Ruby library for parsing Open Graph Protocol information from a website. It also includes a fallback solution when the website has no Open Graph information." 14 | s.email = "hhuy424@gmail.com" 15 | s.extra_rdoc_files = [ 16 | "LICENSE.txt", 17 | "README.md" 18 | ] 19 | s.files = [ 20 | "lib/open_graph.rb", 21 | "lib/opengraph_parser.rb", 22 | "lib/redirect_follower.rb" 23 | ] 24 | s.homepage = "http://github.com/huyha85/opengraph_parser" 25 | s.licenses = ["MIT"] 26 | s.require_paths = ["lib"] 27 | s.rubygems_version = "1.8.10" 28 | s.summary = "A simple Ruby library for parsing Open Graph Protocol information from a website." 29 | 30 | if s.respond_to? :specification_version then 31 | s.specification_version = 3 32 | 33 | if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then 34 | s.add_runtime_dependency(%q<nokogiri>, [">= 0"]) 35 | s.add_runtime_dependency(%q<addressable>, [">= 0"]) 36 | s.add_development_dependency(%q<rspec>, [">= 0"]) 37 | s.add_development_dependency(%q<rdoc>, [">= 0"]) 38 | s.add_development_dependency(%q<bundler>, [">= 0"]) 39 | s.add_development_dependency(%q<jeweler>, [">= 0"]) 40 | else 41 | s.add_dependency(%q<nokogiri>, [">= 0"]) 42 | s.add_dependency(%q<addressable>, [">= 0"]) 43 | s.add_dependency(%q<rspec>, [">= 0"]) 44 | s.add_dependency(%q<rdoc>, [">= 0"]) 45 | s.add_dependency(%q<bundler>, [">= 0"]) 46 | s.add_dependency(%q<jeweler>, [">= 0"]) 47 | end 48 | else 49 | s.add_dependency(%q<nokogiri>, [">= 0"]) 50 | s.add_dependency(%q<addressable>, [">= 0"]) 51 | s.add_dependency(%q<rspec>, [">= 0"]) 52 | s.add_dependency(%q<rdoc>, [">= 0"]) 53 | s.add_dependency(%q<bundler>, [">= 0"]) 54 | s.add_dependency(%q<jeweler>, [">= 0"]) 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /spec/lib/open_graph_spec.rb: -------------------------------------------------------------------------------- 1 | require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') 2 | 3 | describe OpenGraph do 4 | describe "#initialize" do 5 | context "with invalid src" do 6 | it "should set title and url the same as src" do 7 | og = OpenGraph.new("invalid") 8 | og.src.should == "invalid" 9 | og.title.should == "invalid" 10 | og.url.should == "invalid" 11 | end 12 | end 13 | 14 | context "with no fallback" do 15 | it "should get values from opengraph metadata" do 16 | response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph.html", 'r') { |f| f.read }) 17 | RedirectFollower.stub(:new) { double(resolve: response) } 18 | 19 | og = OpenGraph.new("http://test.host", false) 20 | og.src.should == "http://test.host" 21 | og.title.should == "OpenGraph Title" 22 | og.type.should == "article" 23 | og.url.should == "http://test.host" 24 | og.description.should == "My OpenGraph sample site for Rspec" 25 | og.images.should == ["http://test.host/images/rock1.jpg", "http://test.host/images/rock2.jpg"] 26 | og.original_images.should == ["http://test.host/images/rock1.jpg", "/images/rock2.jpg"] 27 | og.metadata.should == { 28 | title: [{_value: "OpenGraph Title"}], 29 | type: [{_value: "article"}], 30 | url: [{_value: "http://test.host"}], 31 | description: [{_value: "My OpenGraph sample site for Rspec"}], 32 | image: [ 33 | { 34 | _value: "http://test.host/images/rock1.jpg", 35 | width: [{ _value: "300" }], 36 | height: [{ _value: "300" }] 37 | }, 38 | { 39 | _value: "/images/rock2.jpg", 40 | height: [{ _value: "1000" }] 41 | } 42 | ], 43 | locale: [ 44 | { 45 | _value: "en_GB", 46 | alternate: [ 47 | { _value: "fr_FR" }, 48 | { _value: "es_ES" } 49 | ] 50 | } 51 | ] 52 | } 53 | end 54 | end 55 | 56 | context "with fallback" do 57 | context "when website has opengraph metadata" do 58 | it "should get values from opengraph metadata" do 59 | response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph.html", 'r') { |f| f.read }) 60 | RedirectFollower.stub(:new) { double(resolve: response) } 61 | 62 | og = OpenGraph.new("http://test.host") 63 | og.src.should == "http://test.host" 64 | og.title.should == "OpenGraph Title" 65 | og.type.should == "article" 66 | og.url.should == "http://test.host" 67 | og.description.should == "My OpenGraph sample site for Rspec" 68 | og.images.should == ["http://test.host/images/rock1.jpg", "http://test.host/images/rock2.jpg"] 69 | end 70 | end 71 | 72 | context "when website has no opengraph metadata" do 73 | it "should lookup for other data from website" do 74 | response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph_no_metadata.html", 'r') { |f| f.read }) 75 | RedirectFollower.stub(:new) { double(resolve: response) } 76 | 77 | og = OpenGraph.new("http://test.host/child_page") 78 | og.src.should == "http://test.host/child_page" 79 | og.title.should == "OpenGraph Title Fallback" 80 | og.type.should be_nil 81 | og.url.should == "http://test.host/child_page" 82 | og.description.should == "Short Description Fallback" 83 | og.images.should == ["http://test.host/images/wall1.jpg", "http://test.host/images/wall2.jpg"] 84 | end 85 | end 86 | 87 | context "when website has no opengraph metadata nor description" do 88 | it "should lookup for other data from website" do 89 | response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph_no_meta_nor_description.html", 'r') { |f| f.read }) 90 | RedirectFollower.stub(:new) { double(resolve: response) } 91 | 92 | og = OpenGraph.new("http://test.host/child_page") 93 | og.src.should == "http://test.host/child_page" 94 | og.title.should == "OpenGraph Title Fallback" 95 | og.type.should be_nil 96 | og.url.should == "http://test.host/child_page" 97 | og.description.should == "No description meta here." 98 | og.images.should == ["http://test.host/images/wall1.jpg", "http://test.host/images/wall2.jpg"] 99 | end 100 | end 101 | end 102 | 103 | context "with body" do 104 | it "should parse body instead of downloading it" do 105 | content = File.read("#{File.dirname(__FILE__)}/../view/opengraph.html") 106 | RedirectFollower.should_not_receive(:new) 107 | 108 | og = OpenGraph.new(content) 109 | og.src.should == content 110 | og.title.should == "OpenGraph Title" 111 | og.type.should == "article" 112 | og.url.should == "http://test.host" 113 | og.description.should == "My OpenGraph sample site for Rspec" 114 | og.images.should == ["http://test.host/images/rock1.jpg", "http://test.host/images/rock2.jpg"] 115 | end 116 | end 117 | end 118 | end 119 | -------------------------------------------------------------------------------- /spec/lib/redirect_follower_spec.rb: -------------------------------------------------------------------------------- 1 | require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') 2 | 3 | describe RedirectFollower do 4 | describe "#resolve" do 5 | let(:url) { "http://test.host" } 6 | let(:https_url) { "https://test.host" } 7 | let(:mock_res) { double(body: "Body is here.") } 8 | let(:mock_redirect) { 9 | m = double(body: %Q{<body><a href="http://new.test.host"></a></body>}, kind_of?: Net::HTTPRedirection) 10 | m.stub(:[]).and_return(nil) 11 | m 12 | } 13 | 14 | context "with no redirection" do 15 | it "should return the response" do 16 | uri = Addressable::URI.parse(url) 17 | 18 | http = Net::HTTP.new(uri.host, uri.inferred_port) 19 | Net::HTTP.should_receive(:new).with(uri.host, uri.inferred_port).and_return(http) 20 | http.should_receive(:request_get).and_return(mock_res) 21 | 22 | res = RedirectFollower.new(url).resolve 23 | res.body.should == "Body is here." 24 | res.redirect_limit.should == RedirectFollower::REDIRECT_DEFAULT_LIMIT 25 | end 26 | 27 | describe "and uri scheme is HTTPS" do 28 | it "should use https method to retrieve the uri" do 29 | uri = Addressable::URI.parse(url) 30 | 31 | https = Net::HTTP.new(uri.host, uri.inferred_port) 32 | Net::HTTP.should_receive(:new).with(uri.host, uri.inferred_port).and_return(https) 33 | https.should_receive(:request_get).and_return(mock_res) 34 | 35 | res = RedirectFollower.new(https_url).resolve 36 | res.body.should == "Body is here." 37 | res.redirect_limit.should == RedirectFollower::REDIRECT_DEFAULT_LIMIT 38 | end 39 | end 40 | 41 | describe "and has headers option" do 42 | it "should add headers when retrieve the uri" do 43 | uri = Addressable::URI.parse(url) 44 | 45 | http = Net::HTTP.new(uri.host, uri.inferred_port) 46 | Net::HTTP.should_receive(:new).with(uri.host, uri.inferred_port).and_return(http) 47 | http.should_receive(:request_get).and_return(mock_res) 48 | res = RedirectFollower.new(url, {:headers => {'User-Agent' => 'My Custom User-Agent'}}).resolve 49 | res.body.should == "Body is here." 50 | res.redirect_limit.should == RedirectFollower::REDIRECT_DEFAULT_LIMIT 51 | end 52 | end 53 | end 54 | 55 | context "with redirection" do 56 | it "should follow the link in redirection" do 57 | uri = Addressable::URI.parse(url) 58 | 59 | http = Net::HTTP.new(uri.host, uri.inferred_port) 60 | Net::HTTP.should_receive(:new).twice.and_return(http) 61 | http.should_receive(:request_get).twice.and_return(mock_redirect, mock_res) 62 | 63 | res = RedirectFollower.new(url).resolve 64 | res.body.should == "Body is here." 65 | res.redirect_limit.should == RedirectFollower::REDIRECT_DEFAULT_LIMIT - 1 66 | end 67 | end 68 | 69 | context "with unlimited redirection" do 70 | it "should raise TooManyRedirects error" do 71 | uri = Addressable::URI.parse(url) 72 | 73 | http = Net::HTTP.new(uri.host, uri.inferred_port) 74 | Net::HTTP.stub(:new).and_return(http) 75 | http.stub(:request_get).and_return(mock_redirect) 76 | 77 | lambda { 78 | RedirectFollower.new(url).resolve 79 | }.should raise_error(RedirectFollower::TooManyRedirects) 80 | end 81 | end 82 | end 83 | end 84 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 2 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 3 | require 'rspec' 4 | require 'open_graph' 5 | require 'redirect_follower' 6 | 7 | # Requires supporting files with custom matchers and macros, etc, 8 | # in ./support/ and its subdirectories. 9 | Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f} 10 | 11 | RSpec.configure do |config| 12 | 13 | end 14 | -------------------------------------------------------------------------------- /spec/view/opengraph.html: -------------------------------------------------------------------------------- 1 | <!-- 2 | =================================================================== 3 | =============================== https://github.com/huyha85 === 4 | =================================================================== 5 | --> 6 | 7 | <html> 8 | <head> 9 | <title>OpenGraph Title Fallback 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /spec/view/opengraph_no_meta_nor_description.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | OpenGraph Title Fallback 4 | 5 | 6 | 7 | 8 | 9 | 10 |

No description meta here.

11 | 12 | 13 | -------------------------------------------------------------------------------- /spec/view/opengraph_no_metadata.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | OpenGraph Title Fallback 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | --------------------------------------------------------------------------------