├── .gitignore
├── .ruby-gemset
├── .ruby-version
├── .travis.yml
├── CHANGELOG.md
├── Gemfile
├── LICENSE.md
├── PageRankr.gemspec
├── README.md
├── Rakefile
├── lib
├── PageRankr.rb
├── page_rankr.rb
└── page_rankr
│ ├── backlink.rb
│ ├── backlinks.rb
│ ├── backlinks
│ ├── bing.rb
│ ├── google.rb
│ └── yahoo.rb
│ ├── index.rb
│ ├── indexes.rb
│ ├── indexes
│ ├── bing.rb
│ ├── google.rb
│ └── yahoo.rb
│ ├── proxy_services.rb
│ ├── proxy_services
│ ├── random.rb
│ └── round_robin.rb
│ ├── rank.rb
│ ├── ranks.rb
│ ├── ranks
│ ├── alexa_country.rb
│ ├── alexa_global.rb
│ ├── alexa_us.rb
│ ├── domain_authority.rb
│ ├── google.rb
│ ├── google
│ │ └── checksum.rb
│ ├── moz_rank.rb
│ └── page_authority.rb
│ ├── request.rb
│ ├── site.rb
│ ├── social.rb
│ ├── socials.rb
│ ├── socials
│ ├── facebook.rb
│ ├── google.rb
│ ├── linkedin.rb
│ ├── pinterest.rb
│ ├── stumble_upon.rb
│ ├── twitter.rb
│ └── vk.rb
│ ├── tracker.rb
│ ├── trackers.rb
│ └── version.rb
├── out.html
└── spec
├── backlinks
├── bing_spec.rb
├── google_spec.rb
└── yahoo_spec.rb
├── fixtures
└── vcr_cassettes
│ ├── PageRankr_Backlinks_Bing
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Backlinks_Google
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Backlinks_Yahoo
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Indexes_Bing
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Indexes_Google
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Indexes_Yahoo
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Ranks_AlexaCountry
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Ranks_AlexaGlobal
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Ranks_AlexaUs
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Ranks_DomainAuthority
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Ranks_Google
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Ranks_MozRank
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Ranks_PageAuthority
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Socials_Facebook
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Socials_Google
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Socials_LinkedIn
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Socials_Pinterest
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Socials_StumbleUpon
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Socials_Twitter
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── PageRankr_Socials_Vk
│ └── _run
│ │ ├── with_match
│ │ └── .yml
│ │ └── with_no_match
│ │ └── .yml
│ ├── alexa_ranks_edge_case_1.yml
│ ├── failure_socials.yml
│ └── success_socials.yml
├── indexes
├── bing_spec.rb
├── google_spec.rb
└── yahoo_spec.rb
├── page_rankr_spec.rb
├── proxy_services
├── random_spec.rb
└── round_robin_spec.rb
├── ranks
├── alexa_country_spec.rb
├── alexa_global_spec.rb
├── alexa_us_spec.rb
├── domain_authority_spec.rb
├── google
│ └── checksum_spec.rb
├── google_spec.rb
├── moz_rank_spec.rb
└── page_authority_spec.rb
├── site_spec.rb
├── socials
├── facebook_spec.rb
├── google_spec.rb
├── linkedin_spec.rb
├── pinterest_spec.rb
├── stumble_upon_spec.rb
├── twitter_spec.rb
└── vk_spec.rb
├── spec_helper.rb
└── support
└── custom_matchers.rb
/.gitignore:
--------------------------------------------------------------------------------
1 | ## MAC OS
2 | .DS_Store
3 |
4 | ## TEXTMATE
5 | *.tmproj
6 | tmtags
7 |
8 | ## EMACS
9 | *~
10 | \#*
11 | .\#*
12 |
13 | ## VIM
14 | *.swp
15 |
16 | ## PROJECT::GENERAL
17 | coverage
18 | rdoc
19 | pkg
20 |
21 | ## PROJECT::SPECIFIC
22 | .idea
23 | doc
24 | .yardoc
25 | .bundle
26 | Gemfile.lock
--------------------------------------------------------------------------------
/.ruby-gemset:
--------------------------------------------------------------------------------
1 | page_rankr
2 |
--------------------------------------------------------------------------------
/.ruby-version:
--------------------------------------------------------------------------------
1 | 2.2.3
2 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | rvm:
2 | - 2.0.0
3 | - 2.1.5
4 | - 2.2.0
5 | sudo: false
6 | cache: bundler
7 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 |
3 | ## Version 4.6.1
4 | * Loosens version requirement for public_suffix (Hopefully a version 2 never comes out, so I don't have to hear about it again.)
5 |
6 | ## Version 4.6.0
7 | * Bumps public_suffix
8 | * Support IDN addresses
9 | * Change alexa to support subdomains
10 | * Resctructure tests
11 |
12 | ## Version 4.5.0
13 | * Bumps to public_suffix Gem to 1.5.1
14 | * Deprecates support for Ruby < 2.0
15 |
16 | ## Version 4.4.1
17 | * Fixes Alexa US to use correct rank
18 |
19 | ## Version 4.4.0
20 | * Adds social signals for google, linked_in, pinterest, stumble_upon, twitter, and vk
21 |
22 | ## Version 4.3.0
23 | * Add Page Authority and Domain Authority ranks
24 |
25 | ## Version 4.2.1
26 | * Fix Yahoo trackers xpath
27 | * Require newer version of public_suffix_service
28 |
29 | ## Version 4.2.0
30 | * Added Moz Rank and Page Authority metrics.
31 |
32 | ## Version 4.1.1
33 | * Fix issue where Google Rank tracker did not pass options to request.
34 |
35 | ## Version 4.1.0
36 | * Add Alexa Country rank.
37 |
38 | ## Version 4.0.0
39 | * Fix for Google pagerank check. Query parts were omitted which made the some pages get the same PR as the domain.
40 | * Switch from Typheous to HTTParty to avoid memory leaks and have better support on Windows.
41 |
42 | ## Version 3.2.1
43 | * Fix issue where tracker calls proxy with class name rather than name defined on class instance.
44 |
45 | ## Version 3.2.0
46 | * Added proxy service
47 | * Removed Compete rank tracker, because there is no way to get it without a key now
48 | * Fixed google backlinks and indexes
49 |
50 | ## Version 3.1.2
51 | * Fix issue with URI parse in 1.9.3
52 |
53 | ## Version 3.1.1
54 | * Fix google backlinks and indexes
55 |
56 | ## Version 3.1.0
57 | * Add yahoo index back
58 | * Fix yahoo and google backlinks and indexes
59 | * Improve PageRankr::Site to support different levels of specificity
60 |
61 | ## Version 3.0.2
62 | * Update gem dependencies
63 |
64 | ## Version 3.0.1
65 | * Improve PageRankr::Site
66 |
67 | ## Version 3.0.0
68 | * Refactor
69 | * Move the logic for the typhoeus request out of the individual trackers in Tracker to hide the complexity.
70 | * Create Tracker to encapsulate the common logic in Backlink, Tracker, and Rank.
71 | * Have each file declare it's dependencies, so that it is simple to use a la carte.
72 | * Fix google backlink and index. The search API is deprecated and the new API is annoying to setup, so webscraping has been brought back.
73 | * Make requires consistent.
74 | * API Breakages
75 | * Tracker was renamed to Trackers and a new Tracker file was created that represents a different object.
76 |
77 | ## Version 2.0.4
78 | * Fix google page rank url
79 |
80 | ## Version 2.0.3
81 | * Fix Compete scraper
82 |
83 | ## Version 2.0.2
84 | * Update public_suffix_service gem
85 |
86 | ## Version 2.0.1
87 | * Alexa sometimes returns result for the incorrect site. In this case, the results returned are ignored.
88 |
89 | ## Version 2.0.0
90 | * URL validation
91 | * Parallel requests = way faster!
92 | * Not tracked returns nil
93 | * Alexa US and Global are treated as separate trackers and returned results are a single level hash.
94 | * Removed Altavista and AllTheWeb because they now direct to yahoo.
95 | * Changed some classes to modules so that it wasn't necessary to specify them when opening the class.
96 |
97 | ## Version 1.7.1
98 | * Catches exception thrown when doing compete rank lookup with url not in the form "google.com".
99 |
100 | ## Version 1.7.0
101 | * Merged in additions from iteration labs to add compete rank tracker and domain indexes.
102 |
103 | ## Version 1.6.0
104 |
105 | * Added ability to get global alexa rank instead of just us alexa rank.
106 |
107 | ## Version 1.5.1
108 |
109 | * Added json gem requirement for rubies < 1.9
110 |
111 | ## Version 1.5.0
112 |
113 | * Use googles api to retrieve backlinks.
114 | * Changed workflow for building gems.
115 |
116 | ## Version 1.4.3
117 |
118 | * Fixed google backlink lookup where odd invalid urls were getting high results. For example, "gaybuttfuckers". Thanks to Zach Elko.
119 |
120 | ## Version 1.4.2
121 |
122 | * Fixed bug where sites not tracked by google were returning nil instead of -1.
123 |
124 | ## Version 1.4.1
125 |
126 | * Broken
127 |
128 | ## Version 1.4.0
129 |
130 | * Made it easier to get at the list of supported trackers.
131 | * Refactoring
132 |
133 | ## Version 1.3.0
134 |
135 | * Lots of refactoring. Should be much easier to extend and temporarily fix if needed.
136 |
137 | ## Version 1.2.0
138 |
139 | * Changed backlinks method with no search engines specified to use all of them
140 | * Changed ranks method with no search engines specified to use all of them
141 | * Added alias rank for ranks
142 | * Added alias backlink for backlinks
143 |
144 | ## Version 1.1.0
145 |
146 | * Fixed google xpath for backlinks
147 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 |
3 | gemspec
4 |
5 | group :development do
6 | gem 'pry-byebug'
7 | end
8 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2009 blatyo
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/PageRankr.gemspec:
--------------------------------------------------------------------------------
1 | require File.expand_path("../lib/page_rankr/version", __FILE__)
2 |
3 | Gem::Specification.new do |s|
4 | s.name = "PageRankr"
5 | s.version = PageRankr::VERSION
6 | s.platform = Gem::Platform::RUBY
7 | s.authors = ["Allen Madsen"]
8 | s.email = ["blatyo@gmail.com"]
9 | s.license = 'MIT'
10 | s.homepage = "http://github.com/blatyo/page_rankr"
11 | s.summary = "Easy way to retrieve Google Page Rank, Alexa Rank, backlink counts, index counts and different types of social signals"
12 | s.description = "Easy way to retrieve Google Page Rank, Alexa Rank, backlink counts, index counts and different types of social signals"
13 |
14 | s.required_rubygems_version = ">= 1.3.6"
15 | s.add_development_dependency "rake"
16 | s.add_development_dependency "rspec", ">= 2.6.0"
17 | s.add_development_dependency "bundler", ">= 1.0.0"
18 | s.add_development_dependency "fuubar", ">= 0.0.1"
19 | s.add_development_dependency "vcr", ">= 2.9.3"
20 | s.add_development_dependency "webmock"
21 |
22 | s.add_runtime_dependency "nokogiri", ">= 1.4.1"
23 | s.add_runtime_dependency "json", ">= 1.4.6"
24 | s.add_runtime_dependency "public_suffix", "~> 1.0"
25 | s.add_runtime_dependency "httparty", ">= 0.9.0"
26 | s.add_runtime_dependency "jsonpath", ">= 0.4.2"
27 | s.add_runtime_dependency "addressable"
28 |
29 | s.files = `git ls-files`.split("\n")
30 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
31 | s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
32 | s.require_paths = ["lib"]
33 | end
34 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PageRankr [](http://travis-ci.org/blatyo/page_rankr)
2 |
3 | Provides an easy way to retrieve Google Page Rank, Alexa Rank, backlink counts, index counts and different types of social signals.
4 |
5 | __This project is abandoned. If you'd like to take ownership of this project, let me know.__
6 |
7 | _Note: Version ~> 2.0 and ~> 3.0 used typheous internally which caused memory leak issues and failures on windows. 4.0.0 changes the implementation to use a Net::HTTP based library for better compatability._
8 |
9 | _Note: Version >= 4.1.0 no longer actively maintains compatibility with Ruby 1.8.X. It will probably still work for the time being._
10 |
11 | _Note: Version >= 4.2.0 no longer actively maintains compatibility with Ruby < 1.9.3. It will probably still work, but you may need to specify older versions for gems this library depends on in your Gemfile._
12 |
13 | _Note: Version >= 4.5.0 no longer actively maintains compatibility with Ruby < 2.0._
14 |
15 | Check out a little [web app][1] I wrote up that uses it or look at the [source][2].
16 |
17 | [1]: http://isitpopular.heroku.com
18 | [2]: https://github.com/blatyo/is_it_popular
19 |
20 | ## Get it!
21 |
22 | ``` bash
23 | gem install PageRankr
24 | ```
25 |
26 | ## Gemfile
27 |
28 | ``` ruby
29 | gem 'PageRankr'
30 | ```
31 |
32 | ## Use it!
33 |
34 | ``` ruby
35 | require 'page_rankr'
36 | ```
37 |
38 | ### Backlinks
39 |
40 | Backlinks are the result of doing a search with a query like "link:www.google.com". The number of returned results indicates how many sites point to that url. If a site is not tracked then `nil` is returned.
41 |
42 | ``` ruby
43 | PageRankr.backlinks('www.google.com', :google, :bing) #=> {:google=>161000, :bing=>208000000}
44 | PageRankr.backlinks('www.google.com', :yahoo) #=> {:yahoo=>256300062}
45 | ```
46 |
47 | If you don't specify a search engine, then all of them are used.
48 |
49 | ``` ruby
50 | # this
51 | PageRankr.backlinks('www.google.com')
52 | #=> {:google=>23000, :bing=>215000000, :yahoo=>250522337, :alexa=>727036}
53 |
54 | # is equivalent to
55 | PageRankr.backlinks('www.google.com', :google, :bing, :yahoo, :alexa)
56 | #=> {:google=>23000, :bing=>215000000, :yahoo=>250522337, :alexa=>727036}
57 | ```
58 |
59 | You can also use the alias `backlink` instead of `backlinks`.
60 |
61 | Valid search engines are: `:google, :bing, :yahoo, :alexa` (altavista and alltheweb now redirect to yahoo). To get this list you can do:
62 |
63 | ``` ruby
64 | PageRankr.backlink_trackers #=> [:alexa, :bing, :google, :yahoo]
65 | ```
66 |
67 | ### Indexes
68 |
69 | Indexes are the result of doing a search with a query like "site:www.google.com". The number of returned results indicates how many pages of a domain are indexed by a particular search engine. If the site is not indexed `nil` is returned.
70 |
71 | ``` ruby
72 | PageRankr.indexes('www.google.com', :google) #=> {:google=>4860000}
73 | PageRankr.indexes('www.google.com', :bing) #=> {:bing=>2120000}
74 | ```
75 |
76 | If you don't specify a search engine, then all of them are used.
77 |
78 | ``` ruby
79 | # this
80 | PageRankr.indexes('www.google.com')
81 | #=> {:bing=>2120000, :google=>4860000, :yahoo => 4863000}
82 |
83 | # is equivalent to
84 | PageRankr.indexes('www.google.com', :google, :bing, :yahoo)
85 | #=> {:bing=>2120000, :google=>4860000, :yahoo => 4863000}
86 | ```
87 |
88 | You can also use the alias `index` instead of `indexes`.
89 |
90 | Valid search engines are: `:google, :bing, :yahoo`. To get this list you can do:
91 |
92 | ``` ruby
93 | PageRankr.index_trackers #=> [:bing, :google, :yahoo]
94 | ```
95 |
96 | ### Ranks
97 |
98 | Ranks are ratings assigned to specify how popular a site is. The most famous example of this is the google page rank.
99 |
100 | ``` ruby
101 | PageRankr.ranks('www.google.com', :google) #=> {:google=>10}
102 | ```
103 |
104 | If you don't specify a rank provider, then all of them are used.
105 |
106 | ``` ruby
107 | PageRankr.ranks('www.google.com', :alexa_us, :alexa_global, :google, :moz_rank, :page_authority)
108 | #=> {:alexa_us=>1, :alexa_global=>1, :alexa_country=>1, :google=>10, :moz_rank => 8, :page_authority => 97}
109 |
110 | # this also gives the same result
111 | PageRankr.ranks('www.google.com')
112 | #=> {:alexa_us=>1, :alexa_global=>1, :alexa_country=>1, :google=>9, :moz_rank=>8, :domain_authority=>100, :page_authority=>96}
113 | ```
114 |
115 | You can also use the alias `rank` instead of `ranks`.
116 |
117 | Valid rank trackers are: `:alexa_country, :alexa_global, :alexa_us, :google, :moz_rank, :page_authority`. To get this you can do:
118 |
119 | ``` ruby
120 | PageRankr.rank_trackers #=> [:alexa_us, :alexa_global, :alexa_country, :google, :moz_rank, :domain_authority, :page_authority]
121 | ```
122 |
123 | Alexa ranks are descending where 1 is the most popular. Google page ranks are in the range 0-10 where 10 is the most popular. If a site is unindexed then the rank will be nil.
124 |
125 | ### Socials
126 |
127 | Social signals are a somewhat oversimplified way of telling how popular a site or page currently is.
128 |
129 | ``` ruby
130 | PageRankr.socials('www.google.com', :linked_in) #=> {:linked_in=>1001}
131 | ```
132 |
133 | If you don't specify a social tracker, then all of them are used.
134 |
135 | ``` ruby
136 | PageRankr.socials('www.google.com', :google, :linked_in, :pinterest, :stumbled_upon, :twitter, :vk)
137 | #=> {:google=>10000, :linked_in=>1001, :pinterest=>75108, :stumple_upon=>255078, :twitter=>21933764, :vk=>3725}
138 |
139 | # this also gives the same result
140 | PageRankr.socials('www.google.com')
141 | #=> {:google=>10000, :linked_in=>1001, :pinterest=>75108, :stumble_upon=>255078, :twitter=>21933764, :vk=>3725}
142 | ```
143 |
144 | Valid social trackers are: `:google, :linked_in, :pinterest, :stumble_upon, :twitter, :vk`. To get this you can do:
145 |
146 | ``` ruby
147 | PageRankr.social_trackers #=> [:google, :linked_in, :pinterest, :stumble_upon, :twitter, :vk]
148 | ```
149 |
150 |
151 | ## Use it a la carte!
152 |
153 | From versions >= 3, everything should be usable in a much more a la carte manner. If all you care about is google page rank (which I speculate is common) you can get that all by itself:
154 |
155 | ``` ruby
156 | require 'page_rankr/ranks/google'
157 |
158 | tracker = PageRankr::Ranks::Google.new("myawesomesite.com")
159 | tracker.run #=> 2
160 | ```
161 |
162 | Also, once a tracker has run three values will be accessible from it:
163 |
164 | ``` ruby
165 | # The value extracted. Tracked is aliased to rank for PageRankr::Ranks, backlink for PageRankr::Backlinks, and index for PageRankr::Indexes.
166 | tracker.tracked #=> 2
167 |
168 | # The value extracted with the jsonpath, xpath, or regex before being cleaned.
169 | tracker.raw #=> "2"
170 |
171 | # The body of the response
172 | tracker.body #=> "
..."
173 | ```
174 |
175 | ## Rate limiting and proxies
176 |
177 | One of the annoying things about each of these services is that they really don't like you scraping data from them. In order to deal with this issue, they throttle traffic from a single machine. The simplest way to get around this is to use proxy machines to make the requests.
178 |
179 | In PageRankr >= 3.2.0, this is much simpler. The first thing you'll need is a proxy service. Two are provided [here](https://github.com/blatyo/page_rankr/tree/master/lib/page_rankr/proxy_services). A proxy service must define a `proxy` method that takes two arguments. It should return a string like `http://user:password@192.168.1.1:50501`.
180 |
181 | Once you have a proxy service, you can tell PageRankr to use it. For example:
182 |
183 | ``` ruby
184 | PageRankr.proxy_service = PageRankr::ProxyServices::Random.new([
185 | 'http://user:password@192.168.1.1:50501',
186 | 'http://user:password@192.168.1.2:50501'
187 | ])
188 | ```
189 |
190 | Once PageRankr knows about your proxy service, any request that is made will ask for a proxy from the proxy service. It does this by calling the `proxy` method. When it calls the `proxy` method, it passed the name of the tracker (e.g. `:ranks_google`) and the site that is being looked up. Hopefully, this information is sufficient for you to build a much smarter proxy service than the ones provided (pull requests welcome!).
191 |
192 | ## Fix it!
193 |
194 | If you ever find something is broken it should now be much easier to fix it with version >= 1.3.0. For example, if the xpath used to lookup a backlink is broken, just override the method for that class to provide the correct xpath.
195 |
196 | ``` ruby
197 | module PageRankr
198 | class Backlinks
199 | class Bing
200 | def xpath
201 | "//my/new/awesome/@xpath"
202 | end
203 | end
204 | end
205 | end
206 | ```
207 |
208 | ## Extend it!
209 |
210 | If you ever come across a site that provides a rank or backlinks you can hook that class up to automatically be use with PageRankr. PageRankr does this by looking up all the classes namespaced under Backlinks, Indexes, and Ranks.
211 |
212 | ``` ruby
213 | require 'page_rankr/backlink'
214 |
215 | module PageRankr
216 | class Backlinks
217 | class Foo
218 | include Backlink
219 |
220 | # This method is required
221 | def url
222 | "http://example.com/"
223 | end
224 |
225 | # This method specifies the parameters for the url. It is optional, but likely required for the class to be useful.
226 | def params
227 | {:q => tracked_url}
228 | end
229 |
230 | # You can use a method named either xpath, jsonpath, or regex with the appropriate query type
231 | def xpath
232 | "//backlinks/text()"
233 | end
234 |
235 | # Optionally, you could override the clean method if the current implementation isn't sufficient
236 | # def clean(backlink_count)
237 | # #do some of my own cleaning
238 | # super(backlink_count) # strips non-digits and converts it to an integer or nil
239 | # end
240 | end
241 | end
242 | end
243 |
244 | PageRankr::Backlinks::Foo.new("myawesomesite.com").run #=> 3
245 | PageRankr.backlinks("myawesomesite.com", :foo)[:foo] #=> 3
246 | ```
247 |
248 | Then, just make sure you require the class and PageRankr and whenever you call PageRankr.backlinks it'll be able to use your class.
249 |
250 | ## Note on Patches/Pull Requests
251 |
252 | * Fork the project.
253 | * Make your feature addition or bug fix.
254 | * Add tests for it. This is important so I don't break it in a future version unintentionally.
255 | * Commit, do not mess with rakefile, version, or history. (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
256 | * Send me a pull request. Bonus points for topic branches.
257 |
258 | ## TODO Version 5
259 | * Detect request throttling
260 |
261 | ## Shout Out
262 | Gotta give credit where credits due!
263 |
264 | Original inspiration from:
265 |
266 | * [PageRankSharp](https://github.com/alexmipego/PageRankSharp)
267 | * [Google Page Range Lookup/](http://snipplr.com/view/18329/google-page-range-lookup/)
268 | * [AJAX PR Checker](http://www.sitetoolcenter.com/free-website-scripts/ajax-pr-checker.php)
269 |
270 | ## Copyright
271 |
272 | Copyright (c) 2010 Allen Madsen. See LICENSE for details.
273 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require 'bundler'
2 | Bundler::GemHelper.install_tasks
3 |
4 | require 'rspec/core/rake_task'
5 | RSpec::Core::RakeTask.new(:spec)
6 |
7 | task :default => :spec
8 |
9 | desc "Open an irb session preloaded with this library"
10 | task :console do
11 | sh "irb -rubygems -I lib -r page_rankr.rb"
12 | end
--------------------------------------------------------------------------------
/lib/PageRankr.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../page_rankr', __FILE__)
--------------------------------------------------------------------------------
/lib/page_rankr.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path("../page_rankr/backlinks", __FILE__)
2 | require File.expand_path("../page_rankr/ranks", __FILE__)
3 | require File.expand_path("../page_rankr/indexes", __FILE__)
4 | require File.expand_path("../page_rankr/proxy_services", __FILE__)
5 | require File.expand_path("../page_rankr/socials", __FILE__)
6 |
7 | module PageRankr
8 | class MethodRequired < StandardError; end
9 | class DomainInvalid < StandardError; end
10 | class SupportedComponentsInvalid < StandardError; end
11 |
12 | class << self
13 | attr_accessor :proxy_service
14 |
15 | def backlinks(site, *search_engines)
16 | Backlinks.new.lookup(Site.new(site), *search_engines)
17 | end
18 | alias_method :backlink, :backlinks
19 |
20 | def backlink_trackers
21 | Backlinks.new.backlink_trackers
22 | end
23 |
24 | def ranks(site, *rank_trackers)
25 | Ranks.new.lookup(Site.new(site), *rank_trackers)
26 | end
27 | alias_method :rank, :ranks
28 |
29 | def rank_trackers
30 | Ranks.new.rank_trackers
31 | end
32 |
33 | def indexes(site, *index_trackers)
34 | Indexes.new.lookup(Site.new(site), *index_trackers)
35 | end
36 | alias_method :index, :indexes
37 |
38 | def index_trackers
39 | Indexes.new.index_trackers
40 | end
41 |
42 | def socials(site, *social_trackers)
43 | Socials.new.lookup(Site.new(site), *social_trackers)
44 | end
45 | alias_method :social, :socials
46 |
47 | def social_trackers
48 | Socials.new.social_trackers
49 | end
50 | end
51 | end
--------------------------------------------------------------------------------
/lib/page_rankr/backlink.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../tracker', __FILE__)
2 |
3 | module PageRankr
4 | module Backlink
5 | include Tracker
6 |
7 | alias_method :backlink, :tracked
8 |
9 | def clean(raw)
10 | cleaned_content = super(raw)
11 | return nil if cleaned_content.nil? || cleaned_content.zero?
12 | cleaned_content
13 | end
14 | end
15 | end
--------------------------------------------------------------------------------
/lib/page_rankr/backlinks.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path("../trackers", __FILE__)
2 | require File.expand_path("../backlinks/bing", __FILE__)
3 | require File.expand_path("../backlinks/google", __FILE__)
4 | require File.expand_path("../backlinks/yahoo", __FILE__)
5 |
6 | module PageRankr
7 | class Backlinks
8 | include Trackers
9 |
10 | alias_method :backlink_trackers, :site_trackers
11 | end
12 | end
13 |
--------------------------------------------------------------------------------
/lib/page_rankr/backlinks/bing.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../backlink', __FILE__)
2 |
3 | module PageRankr
4 | class Backlinks
5 | class Bing
6 | include Backlink
7 |
8 | def url
9 | "http://www.bing.com/search"
10 | end
11 |
12 | def params
13 | {:q => "inbody:#{tracked_url}"}
14 | end
15 |
16 | def xpath
17 | "//span[@class='sb_count']/text()"
18 | end
19 |
20 | def clean(backlink_count)
21 | super(backlink_count.gsub('1-10', ''))
22 | end
23 |
24 | def name
25 | :backlinks_bing
26 | end
27 | end
28 | end
29 | end
--------------------------------------------------------------------------------
/lib/page_rankr/backlinks/google.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../backlink', __FILE__)
2 |
3 | module PageRankr
4 | class Backlinks
5 | class Google
6 | include Backlink
7 |
8 | def url
9 | "http://www.google.com/search"
10 | end
11 |
12 | def params
13 | {:q => "link:#{tracked_url}"}
14 | end
15 |
16 | def xpath
17 | "//div[@id='resultStats']/text()"
18 | end
19 |
20 | def name
21 | :backlinks_google
22 | end
23 | end
24 | end
25 | end
--------------------------------------------------------------------------------
/lib/page_rankr/backlinks/yahoo.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../backlink', __FILE__)
2 |
3 | module PageRankr
4 | class Backlinks
5 | class Yahoo
6 | include Backlink
7 |
8 | def url
9 | "http://search.yahoo.com/search"
10 | end
11 |
12 | def params
13 | {:p => "inbody:#{tracked_url}"}
14 | end
15 |
16 | def xpath
17 | "//div[@class='compPagination']/span/text()"
18 | end
19 |
20 | def name
21 | :backlinks_yahoo
22 | end
23 | end
24 | end
25 | end
26 |
--------------------------------------------------------------------------------
/lib/page_rankr/index.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../tracker', __FILE__)
2 |
3 | module PageRankr
4 | module Index
5 | include Tracker
6 |
7 | alias_method :index, :tracked
8 |
9 | def clean(raw)
10 | cleaned_content = super(raw)
11 | return nil if cleaned_content.nil? || cleaned_content.zero?
12 | cleaned_content
13 | end
14 | end
15 | end
--------------------------------------------------------------------------------
/lib/page_rankr/indexes.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path("../trackers", __FILE__)
2 | require File.expand_path("../indexes/bing", __FILE__)
3 | require File.expand_path("../indexes/google", __FILE__)
4 | require File.expand_path("../indexes/yahoo", __FILE__)
5 |
6 | module PageRankr
7 | class Indexes
8 | include Trackers
9 |
10 | alias_method :index_trackers, :site_trackers
11 | end
12 | end
--------------------------------------------------------------------------------
/lib/page_rankr/indexes/bing.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../index', __FILE__)
2 |
3 | module PageRankr
4 | class Indexes
5 | class Bing
6 | include Index
7 |
8 | def url
9 | "http://www.bing.com/search"
10 | end
11 |
12 | def params
13 | {:q => "site:#{tracked_url}"}
14 | end
15 |
16 | def xpath
17 | "//span[@class='sb_count']/text()"
18 | end
19 |
20 | def clean(backlink_count)
21 | super(backlink_count.gsub('1-10', ''))
22 | end
23 |
24 | def name
25 | :indexes_bing
26 | end
27 | end
28 | end
29 | end
--------------------------------------------------------------------------------
/lib/page_rankr/indexes/google.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../index', __FILE__)
2 |
3 | module PageRankr
4 | class Indexes
5 | class Google
6 | include Index
7 |
8 | def url
9 | "http://www.google.com/search"
10 | end
11 |
12 | def params
13 | {:q => "site:#{tracked_url}"}
14 | end
15 |
16 | def xpath
17 | "//div[@id='resultStats']/text()"
18 | end
19 |
20 | def name
21 | :indexes_google
22 | end
23 | end
24 | end
25 | end
--------------------------------------------------------------------------------
/lib/page_rankr/indexes/yahoo.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../index', __FILE__)
2 |
3 | module PageRankr
4 | class Indexes
5 | class Yahoo
6 | include Index
7 |
8 | def url
9 | "http://search.yahoo.com/search"
10 | end
11 |
12 | def params
13 | {:p => "site:#{tracked_url}"}
14 | end
15 |
16 | def xpath
17 | "//div[@class='compPagination']/span/text()"
18 | end
19 |
20 | def name
21 | :indexes_yahoo
22 | end
23 | end
24 | end
25 | end
26 |
--------------------------------------------------------------------------------
/lib/page_rankr/proxy_services.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path("../proxy_services/random", __FILE__)
2 | require File.expand_path("../proxy_services/round_robin", __FILE__)
3 |
4 | module ProxyServices
5 | end
--------------------------------------------------------------------------------
/lib/page_rankr/proxy_services/random.rb:
--------------------------------------------------------------------------------
1 | require 'uri'
2 |
3 | module PageRankr
4 | module ProxyServices
5 | class Random
6 | def initialize(proxies)
7 | @proxies = proxies
8 | end
9 |
10 | def proxy(name, site)
11 | @proxies[rand(@proxies.length)]
12 | end
13 | end
14 | end
15 | end
--------------------------------------------------------------------------------
/lib/page_rankr/proxy_services/round_robin.rb:
--------------------------------------------------------------------------------
1 | module PageRankr
2 | module ProxyServices
3 | class RoundRobin
4 | def initialize(proxies)
5 | @proxies = proxies
6 | @index = 0
7 | end
8 |
9 | def proxy(name, site)
10 | @proxies[@index].tap do
11 | @index = (@index + 1) % @proxies.length
12 | end
13 | end
14 | end
15 | end
16 | end
--------------------------------------------------------------------------------
/lib/page_rankr/rank.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../tracker', __FILE__)
2 |
3 | module PageRankr
4 | module Rank
5 | include Tracker
6 |
7 | alias_method :rank, :tracked
8 | end
9 | end
--------------------------------------------------------------------------------
/lib/page_rankr/ranks.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path("../trackers", __FILE__)
2 | require File.expand_path("../ranks/alexa_us", __FILE__)
3 | require File.expand_path("../ranks/alexa_global", __FILE__)
4 | require File.expand_path("../ranks/alexa_country", __FILE__)
5 | require File.expand_path("../ranks/google", __FILE__)
6 | require File.expand_path('../ranks/moz_rank', __FILE__)
7 | require File.expand_path('../ranks/domain_authority', __FILE__)
8 | require File.expand_path('../ranks/page_authority', __FILE__)
9 |
10 | module PageRankr
11 | class Ranks
12 | include Trackers
13 |
14 | alias_method :rank_trackers, :site_trackers
15 | end
16 | end
17 |
--------------------------------------------------------------------------------
/lib/page_rankr/ranks/alexa_country.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../rank', __FILE__)
2 |
3 | module PageRankr
4 | class Ranks
5 | class AlexaCountry
6 | include Rank
7 |
8 | def url
9 | "http://data.alexa.com/data"
10 | end
11 |
12 | def params
13 | {:cli => 10, :dat => "snbamz", :url => tracked_url}
14 | end
15 |
16 | # Alexa may sometimes return a result for the incorrect site and thus it is necessary to check if
17 | # the results returned are for the site we want.
18 | #
19 | # For example, slocourts.net returns results for ca.gov, presumably because www.slocourts.ca.gov redirects
20 | # to slocourts.net. Clearly something is wrong with how Alexa handles this case and so in the event this
21 | # happens we treat the results as if there were no results.
22 | def xpath
23 | "//country/@rank"
24 | end
25 |
26 | def supported_components
27 | [:subdomain]
28 | end
29 |
30 | def name
31 | :ranks_alexa_country
32 | end
33 | end
34 | end
35 | end
36 |
--------------------------------------------------------------------------------
/lib/page_rankr/ranks/alexa_global.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../rank', __FILE__)
2 |
3 | module PageRankr
4 | class Ranks
5 | class AlexaGlobal
6 | include Rank
7 |
8 | def url
9 | "http://data.alexa.com/data"
10 | end
11 |
12 | def params
13 | {:cli => 10, :dat => "snbamz", :url => tracked_url}
14 | end
15 |
16 | # Alexa may sometimes return a result for the incorrect site and thus it is necessary to check if
17 | # the results returned are for the site we want.
18 | #
19 | # For example, slocourts.net returns results for ca.gov, presumably because www.slocourts.ca.gov redirects
20 | # to slocourts.net. Clearly something is wrong with how Alexa handles this case and so in the event this
21 | # happens we treat the results as if there were no results.
22 | def xpath
23 | "//popularity/@text"
24 | end
25 |
26 | def supported_components
27 | [:subdomain]
28 | end
29 |
30 | def name
31 | :ranks_alexa_global
32 | end
33 | end
34 | end
35 | end
36 |
--------------------------------------------------------------------------------
/lib/page_rankr/ranks/alexa_us.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../rank', __FILE__)
2 |
3 | module PageRankr
4 | class Ranks
5 | class AlexaUs
6 | include Rank
7 |
8 | def url
9 | "http://data.alexa.com/data"
10 | end
11 |
12 | def params
13 | {:cli => 10, :dat => "snbamz", :url => tracked_url}
14 | end
15 |
16 | # Alexa may sometimes return a result for the incorrect site and thus it is necessary to check if
17 | # the results returned are for the site we want.
18 | #
19 | # For example, slocourts.net returns results for ca.gov, presumably because www.slocourts.ca.gov redirects
20 | # to slocourts.net. Clearly something is wrong with how Alexa handles this case and so in the event this
21 | # happens we treat the results as if there were no results.
22 | def xpath
23 | "//country[@code='US']/@rank"
24 | end
25 |
26 | def supported_components
27 | [:subdomain]
28 | end
29 |
30 | def name
31 | :ranks_alexa_us
32 | end
33 | end
34 | end
35 | end
36 |
--------------------------------------------------------------------------------
/lib/page_rankr/ranks/domain_authority.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../rank', __FILE__)
2 |
3 | module PageRankr
4 | class Ranks
5 | class DomainAuthority
6 | include Rank
7 |
8 | def url
9 | 'https://moz.com/researchtools/ose/api/urlmetrics'
10 | end
11 |
12 | def params
13 | {:site => tracked_url}
14 | end
15 |
16 | def jsonpath
17 | 'data.authority.domain_authority'
18 | end
19 |
20 | def supported_components
21 | [:subdomain, :path, :query]
22 | end
23 |
24 | def name
25 | :ranks_domain_authority
26 | end
27 | end
28 | end
29 | end
30 |
--------------------------------------------------------------------------------
/lib/page_rankr/ranks/google.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../rank', __FILE__)
2 | require File.expand_path('../google/checksum', __FILE__)
3 |
4 | module PageRankr
5 | class Ranks
6 | class Google
7 | include Rank
8 |
9 | def initialize(site, options = {})
10 | @site = PageRankr::Site(site)
11 | @checksum = Checksum.generate("info:#{tracked_url}")
12 |
13 | super(site, options)
14 | end
15 |
16 | def supported_components
17 | [:subdomain, :path, :query]
18 | end
19 |
20 | def url
21 | "http://toolbarqueries.google.com/tbr"
22 | end
23 |
24 | def params
25 | {:client => "navclient-auto", :ch => @checksum, :features => "Rank", :q => "info:#{tracked_url}"}
26 | end
27 |
28 | def regex
29 | /Rank_\d+:\d+:(\d+)/
30 | end
31 |
32 | def name
33 | :ranks_google
34 | end
35 | end
36 | end
37 | end
38 |
--------------------------------------------------------------------------------
/lib/page_rankr/ranks/google/checksum.rb:
--------------------------------------------------------------------------------
1 | module PageRankr
2 | class Ranks
3 | class Google
4 | class Checksum
5 | class << self
6 | def generate(site)
7 | bytes = byte_array(site)
8 | length = bytes.length
9 | a = b = 0x9E3779B9
10 | c = 0xE6359A60
11 |
12 | k, len = 0, length
13 | while(len >= 12)
14 | a, b, c = mix(*shift(a, b, c, k, bytes))
15 | k += 12
16 | len -= 12
17 | end
18 |
19 | c = c + length
20 |
21 | c = mix(*toss(a, b, c, bytes, len, k))[2]
22 | "6" + c.to_s
23 | end
24 |
25 | private
26 |
27 | def byte_array(site)
28 | bytes = []
29 | site.each_byte {|b| bytes << b}
30 | bytes
31 | end
32 |
33 | # Need to keep numbers in the unsigned int 32 range
34 | def m(v)
35 | v % 0x100000000
36 | end
37 |
38 | def shift(a, b, c, k, bytes)
39 | a = m(a + bytes[k + 0] + (bytes[k + 1] << 8) + (bytes[k + 2] << 16) + (bytes[k + 3] << 24))
40 | b = m(b + bytes[k + 4] + (bytes[k + 5] << 8) + (bytes[k + 6] << 16) + (bytes[k + 7] << 24))
41 | c = m(c + bytes[k + 8] + (bytes[k + 9] << 8) + (bytes[k + 10] << 16) + (bytes[k + 11] << 24))
42 |
43 | [a, b, c]
44 | end
45 |
46 | def mix(a, b, c)
47 | a, b, c = m(a), m(b), m(c)
48 |
49 | a = m(a-b-c) ^ m(c >> 13)
50 | b = m(b-c-a) ^ m(a << 8)
51 | c = m(c-a-b) ^ m(b >> 13)
52 |
53 | a = m(a-b-c) ^ m(c >> 12)
54 | b = m(b-c-a) ^ m(a << 16)
55 | c = m(c-a-b) ^ m(b >> 5)
56 |
57 | a = m(a-b-c) ^ m(c >> 3)
58 | b = m(b-c-a) ^ m(a << 10)
59 | c = m(c-a-b) ^ m(b >> 15)
60 |
61 | [a, b, c]
62 | end
63 |
64 | def toss(a, b, c, bytes, len, k)
65 | case len
66 | when 9..11
67 | c = c + (bytes[k+len-1] << ((len % 8) * 8))
68 | when 5..8
69 | b = b + (bytes[k+len-1] << ((len % 5) * 8))
70 | when 1..4
71 | a = a + (bytes[k+len-1] << ((len - 1) * 8))
72 | else
73 | return [a, b, c]
74 | end
75 | toss(a, b, c, bytes, len-1, k)
76 | end
77 | end
78 | end
79 | end
80 | end
81 | end
--------------------------------------------------------------------------------
/lib/page_rankr/ranks/moz_rank.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../rank', __FILE__)
2 |
3 | module PageRankr
4 | class Ranks
5 | class MozRank
6 | include Rank
7 |
8 | def url
9 | 'http://bagics.com/moz-rank.html'
10 | end
11 |
12 | def params
13 | {:domain => tracked_url}
14 | end
15 |
16 | def xpath
17 | '//*[@id="resId"]'
18 | end
19 |
20 | def supported_components
21 | [:subdomain, :path, :query]
22 | end
23 |
24 | def name
25 | :ranks_moz_rank
26 | end
27 | end
28 | end
29 | end
--------------------------------------------------------------------------------
/lib/page_rankr/ranks/page_authority.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../rank', __FILE__)
2 |
3 | module PageRankr
4 | class Ranks
5 | class PageAuthority
6 | include Rank
7 |
8 | def url
9 | 'https://moz.com/researchtools/ose/api/urlmetrics'
10 | end
11 |
12 | def params
13 | {site: tracked_url}
14 | end
15 |
16 | def jsonpath
17 | 'data.authority.page_authority'
18 | end
19 |
20 | def supported_components
21 | [:subdomain, :path, :query]
22 | end
23 |
24 | def name
25 | :ranks_page_authority
26 | end
27 | end
28 | end
29 | end
30 |
--------------------------------------------------------------------------------
/lib/page_rankr/request.rb:
--------------------------------------------------------------------------------
1 | require 'httparty'
2 |
3 | module PageRankr
4 | class Request
5 | def initialize(tracker, options)
6 | @tracker = tracker
7 | @options = options
8 | end
9 |
10 | def perform
11 | method = tracker.method
12 | url = tracker.url
13 |
14 | response = HTTParty.send(method, url, construct_options(tracker))
15 | yield response.body if block_given?
16 | end
17 |
18 | private
19 | attr_reader :tracker
20 |
21 | def construct_options(tracker)
22 | proxy = tracker.proxy
23 | params = tracker.params if tracker.respond_to?(:params)
24 |
25 | options = default_options
26 | options.merge!({
27 | :http_proxyaddr => proxy.host,
28 | :http_proxyport => proxy.port,
29 | :http_proxyuser => proxy.user,
30 | :http_proxypass => proxy.password
31 | }) if proxy
32 | options.merge!({:query => params}) if params
33 | options.merge!(@options)
34 | end
35 |
36 | def default_options
37 | {
38 | :headers => {
39 | 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML, like Gecko) Version/5.1.6 Safari/534.56.5'
40 | }
41 | }
42 | end
43 | end
44 | end
--------------------------------------------------------------------------------
/lib/page_rankr/site.rb:
--------------------------------------------------------------------------------
1 | require 'public_suffix'
2 | require 'delegate'
3 | require 'addressable/uri'
4 |
5 | module PageRankr
6 | class Site
7 | COMPONENTS = [:scheme, :subdomain, :domain, :port, :path, :query, :fragment]
8 |
9 | def initialize(site)
10 | site = "http://#{site}" unless site =~ /:\/\//
11 | @uri = Addressable::URI.parse(site)
12 | @domain = PublicSuffix.parse(@uri.host || "")
13 |
14 | @domain.valid? or raise DomainInvalid, "The domain provided is invalid.1"
15 | rescue PublicSuffix::DomainInvalid, Addressable::URI::InvalidURIError
16 | raise DomainInvalid, "The domain provided is invalid."
17 | end
18 |
19 | def scheme
20 | @uri.scheme
21 | end
22 |
23 | def domain
24 | @domain.domain
25 | end
26 |
27 | def subdomain
28 | @domain.subdomain or domain
29 | end
30 |
31 | def port
32 | @uri.port
33 | end
34 |
35 | def path
36 | @uri.path
37 | end
38 |
39 | def query
40 | @uri.query
41 | end
42 |
43 | def fragment
44 | @uri.fragment
45 | end
46 |
47 | def url(supported_components = [:domain])
48 | components = COMPONENTS & supported_components #get ordered list
49 |
50 | unless components.include?(:subdomain) ^ components.include?(:domain)
51 | raise SupportedComponentsInvalid, "Either subdomain or domain should be set as a supported component, not both."
52 | end
53 |
54 | components.inject("") do |url, component|
55 | url + case component
56 | when :scheme
57 | scheme and "#{scheme}://" or ""
58 | when :domain
59 | domain
60 | when :subdomain
61 | subdomain
62 | when :port
63 | port == @uri.default_port and "" or ":#{port}"
64 | when :path
65 | path or ""
66 | when :query
67 | query and "?#{query}" or ""
68 | when :fragment
69 | fragment and "##{fragment}" or ""
70 | end
71 | end
72 | end
73 | end
74 |
75 | class << self
76 | def Site(site)
77 | site.respond_to?(:url) ? site : Site.new(site)
78 | end
79 | end
80 | end
81 |
--------------------------------------------------------------------------------
/lib/page_rankr/social.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../social', __FILE__)
2 |
3 | module PageRankr
4 | module Social
5 | include Tracker
6 |
7 | alias_method :social, :tracked
8 | end
9 | end
--------------------------------------------------------------------------------
/lib/page_rankr/socials.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path("../trackers", __FILE__)
2 | require File.expand_path("../socials/facebook", __FILE__)
3 | require File.expand_path("../socials/google", __FILE__)
4 | require File.expand_path("../socials/linkedin", __FILE__)
5 | require File.expand_path("../socials/pinterest", __FILE__)
6 | require File.expand_path("../socials/stumble_upon", __FILE__)
7 | require File.expand_path("../socials/twitter", __FILE__)
8 | require File.expand_path("../socials/vk", __FILE__)
9 |
10 | module PageRankr
11 | class Socials
12 | include Trackers
13 |
14 | alias_method :social_trackers, :site_trackers
15 | end
16 | end
17 |
--------------------------------------------------------------------------------
/lib/page_rankr/socials/facebook.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../social', __FILE__)
2 |
3 | module PageRankr
4 | class Socials
5 | class Facebook
6 | include Social
7 |
8 | def url
9 | 'http://graph.facebook.com'
10 | end
11 |
12 | def params
13 | {:id => tracked_url}
14 | end
15 |
16 | def jsonpath
17 | 'shares'
18 | end
19 |
20 | def supported_components
21 | [:subdomain, :path, :query, :scheme]
22 | end
23 |
24 | def name
25 | :socials_facebook
26 | end
27 | end
28 | end
29 | end
30 |
--------------------------------------------------------------------------------
/lib/page_rankr/socials/google.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../social', __FILE__)
2 |
3 | module PageRankr
4 | class Socials
5 | class Google
6 | include Social
7 |
8 | def url
9 | # Yandex supplies Google +1 counts in a convenient matter without need for an API key
10 | 'http://share.yandex.ru/gpp.xml'
11 | end
12 |
13 | def params
14 | {:url => tracked_url}
15 | end
16 |
17 | def regex
18 | /(\d+)/
19 | end
20 |
21 | def supported_components
22 | [:subdomain, :path, :scheme]
23 | end
24 |
25 | def name
26 | :socials_google
27 | end
28 | end
29 | end
30 | end
31 |
--------------------------------------------------------------------------------
/lib/page_rankr/socials/linkedin.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../social', __FILE__)
2 |
3 | module PageRankr
4 | class Socials
5 | class LinkedIn
6 | include Social
7 |
8 | def url
9 | 'https://www.linkedin.com/countserv/count/share'
10 | end
11 |
12 | def params
13 | {:url => tracked_url, :callback => '_', :format => 'json'}
14 | end
15 |
16 | def jsonpath
17 | 'count'
18 | end
19 |
20 | def supported_components
21 | [:subdomain, :path, :query]
22 | end
23 |
24 | def name
25 | :socials_linked_in
26 | end
27 | end
28 | end
29 | end
30 |
--------------------------------------------------------------------------------
/lib/page_rankr/socials/pinterest.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../social', __FILE__)
2 |
3 | module PageRankr
4 | class Socials
5 | class Pinterest
6 | include Social
7 |
8 | def url
9 | 'http://api.pinterest.com/v1/urls/count.json'
10 | end
11 |
12 | def params
13 | {:url => tracked_url, :callback => '_'}
14 | end
15 |
16 | def regex
17 | /(\d+)/
18 | end
19 |
20 | def supported_components
21 | [:subdomain, :path, :scheme]
22 | end
23 |
24 | def name
25 | :socials_pinterest
26 | end
27 | end
28 | end
29 | end
30 |
--------------------------------------------------------------------------------
/lib/page_rankr/socials/stumble_upon.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../social', __FILE__)
2 |
3 | module PageRankr
4 | class Socials
5 | class StumbleUpon
6 | include Social
7 |
8 | def url
9 | 'http://www.stumbleupon.com/services/1.01/badge.getinfo'
10 | end
11 |
12 | def params
13 | {:url => tracked_url}
14 | end
15 |
16 | def jsonpath
17 | 'result.views'
18 | end
19 |
20 | def supported_components
21 | [:subdomain, :path, :query]
22 | end
23 |
24 | def name
25 | :socials_stumble_upon
26 | end
27 | end
28 | end
29 | end
30 |
--------------------------------------------------------------------------------
/lib/page_rankr/socials/twitter.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../social', __FILE__)
2 |
3 | module PageRankr
4 | class Socials
5 | class Twitter
6 | include Social
7 |
8 | def url
9 | 'http://urls.api.twitter.com/1/urls/count.json'
10 | end
11 |
12 | def params
13 | {:url => tracked_url}
14 | end
15 |
16 | def jsonpath
17 | 'count'
18 | end
19 |
20 | def supported_components
21 | [:subdomain, :path, :query]
22 | end
23 |
24 | def name
25 | :socials_twitter
26 | end
27 | end
28 | end
29 | end
30 |
--------------------------------------------------------------------------------
/lib/page_rankr/socials/vk.rb:
--------------------------------------------------------------------------------
1 | require File.expand_path('../../social', __FILE__)
2 |
3 | module PageRankr
4 | class Socials
5 | class Vk
6 | include Social
7 |
8 | def url
9 | 'http://vk.com/share.php'
10 | end
11 |
12 | def params
13 | {:url => tracked_url, :act => 'count'}
14 | end
15 |
16 | def regex
17 | /, (\d+)/
18 | end
19 |
20 | def supported_components
21 | [:subdomain, :path, :scheme]
22 | end
23 |
24 | def name
25 | :socials_vk
26 | end
27 | end
28 | end
29 | end
30 |
--------------------------------------------------------------------------------
/lib/page_rankr/tracker.rb:
--------------------------------------------------------------------------------
1 | require 'nokogiri'
2 | require 'json'
3 | require 'jsonpath'
4 | require 'uri'
5 | require File.expand_path('../site', __FILE__)
6 | require File.expand_path('../request', __FILE__)
7 |
8 | module PageRankr
9 | module Tracker
10 | attr_accessor :tracked
11 | attr_accessor :raw
12 | attr_accessor :body
13 |
14 | def initialize(site, options = {})
15 | @site = PageRankr::Site(site)
16 | @options = options
17 |
18 | # Initialize proxy, so threads don't need to synchronize the proxy service.
19 | proxy
20 | end
21 |
22 | def url
23 | raise PageRankr::MethodRequired, "A url method defining the url to the service with the value you wish to extract must be defined."
24 | end
25 |
26 | def tracked_url
27 | @site.url(supported_components)
28 | end
29 |
30 | def supported_components
31 | [:subdomain]
32 | end
33 |
34 | def method
35 | :get
36 | end
37 |
38 | def proxy
39 | @proxy ||= URI.parse(PageRankr.proxy_service.proxy(name, @site)) if PageRankr.proxy_service
40 | end
41 |
42 | def run
43 | PageRankr::Request.new(self, @options).perform do |body|
44 | self.body = body
45 | self.raw = content(body)
46 | self.tracked = clean(raw)
47 | end
48 |
49 | tracked
50 | end
51 |
52 | def content(body)
53 | if respond_to? :xpath
54 | Nokogiri::HTML(body).at(xpath)
55 | elsif respond_to? :jsonpath
56 | JsonPath.new(jsonpath).first(JSON.parse(body))
57 | elsif respond_to? :regex
58 | body =~ regex ? $1 : nil
59 | else
60 | raise PageRankr::MethodRequired, "A method for extracting the value must be defined. Either xpath, jsonpath, or regex."
61 | end.to_s
62 | end
63 |
64 | def clean(content)
65 | cleaned_content = content.to_s.gsub(/\D/, '')
66 |
67 | if cleaned_content.strip == ''
68 | nil
69 | else
70 | cleaned_content.to_i
71 | end
72 | end
73 |
74 | def name
75 | raise PageRankr::MethodRequired, "name is undefined for #{self.class.name}"
76 | end
77 | end
78 | end
--------------------------------------------------------------------------------
/lib/page_rankr/trackers.rb:
--------------------------------------------------------------------------------
1 | require 'thread'
2 |
3 | module PageRankr
4 | module Trackers
5 | attr_accessor :site_trackers
6 |
7 | def initialize
8 | @site_trackers = self.class.constants.collect{|tracker| symbol_for(tracker)}
9 | end
10 |
11 | def lookup(site, *trackers)
12 | trackers = site_trackers if trackers.empty?
13 |
14 | tracked = trackers.map do |tracker|
15 | name, klass = constant_name(tracker), self.class
16 |
17 | next unless klass.const_defined? name
18 |
19 | [
20 | tracker,
21 | build_thread(tracker, klass.const_get(name), site)
22 | ]
23 | end.each do |_, thread|
24 | thread.join
25 | end.map do |tracker, thread|
26 | [tracker, thread.value]
27 | end
28 |
29 | Hash[tracked]
30 | end
31 |
32 | private
33 |
34 | def build_thread(tracker, instance, site)
35 | Thread.new(tracker, instance, site) do |t, i, s|
36 | i.new(s).run
37 | end
38 | end
39 |
40 | def symbol_for(klass)
41 | word = klass.to_s.dup
42 | word.gsub!(/([A-Z]+)([A-Z][a-z])/){|match| "#{$1}_#{$2}" }
43 | word.gsub!(/([a-z\d])([A-Z])/){|match| "#{$1}_#{$2}" }
44 | word.tr!("-", "_")
45 | word.downcase!
46 | word.to_sym
47 | end
48 |
49 | def constant_name(sym)
50 | sym.to_s.split('_').collect{|str| str.capitalize}.join
51 | end
52 | end
53 | end
--------------------------------------------------------------------------------
/lib/page_rankr/version.rb:
--------------------------------------------------------------------------------
1 | module PageRankr
2 | VERSION = "4.6.1"
3 | end
4 |
--------------------------------------------------------------------------------
/out.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blatyo/page_rankr/9d7de0c7df9dd15a9650f177dfda5ac82744a2bb/out.html
--------------------------------------------------------------------------------
/spec/backlinks/bing_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe PageRankr::Backlinks::Bing do
4 | describe '#run' do
5 | let(:tracker){described_class.new(site)}
6 | subject(:result){tracker.run}
7 |
8 | context 'with match', :vcr do
9 | let(:site){'http://www.google.com'}
10 |
11 | it{is_expected.to be_number > 0}
12 | end
13 |
14 | context 'with no match', :vcr do
15 | let(:site){'http://please-dont-register-a-site-that-breaks-this-test.com'}
16 |
17 | # currently matches suggested results
18 | it{is_expected.to be_number < 2}
19 | end
20 | end
21 | end
22 |
--------------------------------------------------------------------------------
/spec/backlinks/google_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe PageRankr::Backlinks::Google do
4 | describe '#run' do
5 | let(:tracker){described_class.new(site)}
6 | subject(:result){tracker.run}
7 |
8 | context 'with match', :vcr do
9 | let(:site){'http://www.google.com'}
10 |
11 | it{is_expected.to be_number > 0}
12 | end
13 |
14 | context 'with no match', :vcr do
15 | let(:site){'http://please-dont-register-a-site-that-breaks-this-test.com'}
16 |
17 | it{is_expected.to be_nil}
18 | end
19 | end
20 | end
21 |
--------------------------------------------------------------------------------
/spec/backlinks/yahoo_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe PageRankr::Backlinks::Yahoo do
4 | describe '#run' do
5 | let(:tracker){described_class.new(site)}
6 | subject(:result){tracker.run}
7 |
8 | context 'with match', :vcr do
9 | let(:site){'http://www.google.com'}
10 |
11 | it{is_expected.to be_number > 0}
12 | end
13 |
14 | context 'with no match', :vcr do
15 | let(:site){'http://please-dont-register-a-site-that-breaks-this-test.com'}
16 |
17 | # currently matches suggested results
18 | it{is_expected.to be_number < 2}
19 | end
20 | end
21 | end
22 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_AlexaCountry/_run/with_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: http://data.alexa.com/data?cli=10&dat=snbamz&url=www.google.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Content-Type:
19 | - text/xml
20 | Date:
21 | - Tue, 15 Sep 2015 00:15:13 GMT
22 | Server:
23 | - nginx
24 | Content-Length:
25 | - '983'
26 | Connection:
27 | - keep-alive
28 | body:
29 | encoding: UTF-8
30 | string: "\r\n\r\n\r\n\r\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
43 | http_version:
44 | recorded_at: Tue, 15 Sep 2015 00:15:13 GMT
45 | recorded_with: VCR 2.9.3
46 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_AlexaCountry/_run/with_no_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: http://data.alexa.com/data?cli=10&dat=snbamz&url=please-dont-register-a-site-that-breaks-this-test.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Content-Type:
19 | - text/xml
20 | Date:
21 | - Tue, 15 Sep 2015 00:15:13 GMT
22 | Server:
23 | - nginx
24 | Content-Length:
25 | - '422'
26 | Connection:
27 | - keep-alive
28 | body:
29 | encoding: UTF-8
30 | string: "\r\n\r\n\r\n\r\n\n\n\n\n\n"
35 | http_version:
36 | recorded_at: Tue, 15 Sep 2015 00:15:13 GMT
37 | recorded_with: VCR 2.9.3
38 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_AlexaGlobal/_run/with_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: http://data.alexa.com/data?cli=10&dat=snbamz&url=www.google.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Content-Type:
19 | - text/xml
20 | Date:
21 | - Tue, 15 Sep 2015 00:15:14 GMT
22 | Server:
23 | - nginx
24 | Content-Length:
25 | - '983'
26 | Connection:
27 | - keep-alive
28 | body:
29 | encoding: UTF-8
30 | string: "\r\n\r\n\r\n\r\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
43 | http_version:
44 | recorded_at: Tue, 15 Sep 2015 00:15:15 GMT
45 | recorded_with: VCR 2.9.3
46 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_AlexaGlobal/_run/with_no_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: http://data.alexa.com/data?cli=10&dat=snbamz&url=please-dont-register-a-site-that-breaks-this-test.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Content-Type:
19 | - text/xml
20 | Date:
21 | - Tue, 15 Sep 2015 00:15:15 GMT
22 | Server:
23 | - nginx
24 | Content-Length:
25 | - '422'
26 | Connection:
27 | - keep-alive
28 | body:
29 | encoding: UTF-8
30 | string: "\r\n\r\n\r\n\r\n\n\n\n\n\n"
35 | http_version:
36 | recorded_at: Tue, 15 Sep 2015 00:15:15 GMT
37 | recorded_with: VCR 2.9.3
38 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_AlexaUs/_run/with_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: http://data.alexa.com/data?cli=10&dat=snbamz&url=www.google.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Content-Type:
19 | - text/xml
20 | Date:
21 | - Tue, 15 Sep 2015 00:15:15 GMT
22 | Server:
23 | - nginx
24 | Content-Length:
25 | - '983'
26 | Connection:
27 | - keep-alive
28 | body:
29 | encoding: UTF-8
30 | string: "\r\n\r\n\r\n\r\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
43 | http_version:
44 | recorded_at: Tue, 15 Sep 2015 00:15:15 GMT
45 | recorded_with: VCR 2.9.3
46 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_AlexaUs/_run/with_no_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: http://data.alexa.com/data?cli=10&dat=snbamz&url=please-dont-register-a-site-that-breaks-this-test.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Content-Type:
19 | - text/xml
20 | Date:
21 | - Tue, 15 Sep 2015 00:15:15 GMT
22 | Server:
23 | - nginx
24 | Content-Length:
25 | - '422'
26 | Connection:
27 | - keep-alive
28 | body:
29 | encoding: UTF-8
30 | string: "\r\n\r\n\r\n\r\n\n\n\n\n\n"
35 | http_version:
36 | recorded_at: Tue, 15 Sep 2015 00:15:15 GMT
37 | recorded_with: VCR 2.9.3
38 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_DomainAuthority/_run/with_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: https://moz.com/researchtools/ose/api/urlmetrics?site=www.google.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Server:
19 | - openresty
20 | Date:
21 | - Mon, 14 Sep 2015 22:46:44 GMT
22 | Content-Type:
23 | - application/json; charset=utf-8
24 | Content-Length:
25 | - '388'
26 | Connection:
27 | - keep-alive
28 | Vary:
29 | - Accept-Encoding
30 | Cache-Control:
31 | - public, max-age=2592000
32 | Etag:
33 | - W/"184-70cce3fe"
34 | X-Response-Time:
35 | - 6.033ms
36 | Set-Cookie:
37 | - "___utmvaFIuocZF=pZi\x01owqr; path=/; Max-Age=900"
38 | - '___utmvbFIuocZF=OZb XgeOialS: Gtu; path=/; Max-Age=900'
39 | - ___utmvmFIuocZF=TXoKwTqtWRw; path=/; Max-Age=900
40 | - incap_ses_163_133232=4hUcGuFA1xKr4XA23RdDAlRO91UAAAAA0ZRJUCXlJX/7NMB47f2gag==;
41 | path=/; Domain=.moz.com
42 | - visid_incap_133232=VtUxXUItTBODKSDISQkgRFRO91UAAAAAQUIPAAAAAAD8I9a8Sh2mvcMV6EUFcScc;
43 | expires=Wed, 13 Sep 2017 08:26:24 GMT; path=/; Domain=.moz.com
44 | X-Iinfo:
45 | - 10-169986278-169986286 NNNN CT(45 47 0) RT(1442270804309 98) q(0 0 1 1) r(1
46 | 1) U5
47 | X-Cdn:
48 | - Incapsula
49 | body:
50 | encoding: UTF-8
51 | string: '{"meta":{"site":"http://www.google.com","requested_site":"http://www.google.com"},"data":{"spam_flags":[false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false],"spam_score":1,"authority":{"page_authority":97.4974451586309,"domain_authority":100},"page":{"just_discovered_links":146644,"linking_root_domains":915522,"inbound_links":75133768}}}'
52 | http_version:
53 | recorded_at: Mon, 14 Sep 2015 22:46:45 GMT
54 | recorded_with: VCR 2.9.3
55 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_DomainAuthority/_run/with_no_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: https://moz.com/researchtools/ose/api/urlmetrics?site=please-dont-register-a-site-that-breaks-this-test.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Server:
19 | - openresty
20 | Date:
21 | - Mon, 14 Sep 2015 22:46:45 GMT
22 | Content-Type:
23 | - application/json; charset=utf-8
24 | Content-Length:
25 | - '419'
26 | Connection:
27 | - keep-alive
28 | Vary:
29 | - Accept-Encoding
30 | Cache-Control:
31 | - public, max-age=2592000
32 | Etag:
33 | - W/"1a3-9dde57ca"
34 | X-Response-Time:
35 | - 319.883ms
36 | Set-Cookie:
37 | - "___utmvaFIuocZF=wsd\x01YNUr; path=/; Max-Age=900"
38 | - '___utmvbFIuocZF=gZc XNXOFalE: dtx; path=/; Max-Age=900'
39 | - ___utmvmFIuocZF=THamZBMugvr; path=/; Max-Age=900
40 | - incap_ses_163_133232=6eaOG/TqHgWr4XA23RdDAlVO91UAAAAA+qhScMrQqg3BqLlWHVjEkQ==;
41 | path=/; Domain=.moz.com
42 | - visid_incap_133232=VtUxXUItTBODKSDISQkgRFRO91UAAAAAQUIPAAAAAAD8I9a8Sh2mvcMV6EUFcScc;
43 | expires=Wed, 13 Sep 2017 08:26:24 GMT; path=/; Domain=.moz.com
44 | X-Iinfo:
45 | - 4-47997076-47997080 NNNN CT(46 48 0) RT(1442270804655 83) q(0 0 1 0) r(5 5)
46 | U5
47 | X-Cdn:
48 | - Incapsula
49 | body:
50 | encoding: UTF-8
51 | string: '{"meta":{"site":"http://please-dont-register-a-site-that-breaks-this-test.com","requested_site":"http://please-dont-register-a-site-that-breaks-this-test.com"},"data":{"spam_flags":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],"spam_score":null,"authority":{"page_authority":1,"domain_authority":1},"page":{"just_discovered_links":0,"linking_root_domains":0,"inbound_links":0}}}'
52 | http_version:
53 | recorded_at: Mon, 14 Sep 2015 22:46:45 GMT
54 | recorded_with: VCR 2.9.3
55 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_Google/_run/with_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: http://toolbarqueries.google.com/tbr?ch=6340563836&client=navclient-auto&features=Rank&q=info:www.google.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Date:
19 | - Mon, 14 Sep 2015 22:48:49 GMT
20 | Pragma:
21 | - no-cache
22 | Expires:
23 | - Fri, 01 Jan 1990 00:00:00 GMT
24 | Cache-Control:
25 | - no-cache, must-revalidate
26 | Content-Type:
27 | - text/html; charset=ISO-8859-1
28 | P3p:
29 | - CP="This is not a P3P policy! See http://www.google.com/support/accounts/bin/answer.py?hl=en&answer=151657
30 | for more info."
31 | Server:
32 | - gws
33 | X-Xss-Protection:
34 | - 1; mode=block
35 | X-Frame-Options:
36 | - SAMEORIGIN
37 | Set-Cookie:
38 | - NID=71=kTGtQ1IqZuZWhoyxJOUkspDh_UBuf3k_wNZCNH48fhizm_Q5VqUlU559bfnbdahaKPOVhbil0dHT6ucsaLqCwLAphm0KFi5ujD4ZYu7s1vJ0K1bM4LF2Lqr2UROZ54q5;
39 | expires=Tue, 15-Mar-2016 22:48:49 GMT; path=/; domain=.google.com; HttpOnly
40 | - PREF=ID=1111111111111111:FF=0:TM=1442270929:LM=1442270929:V=1:S=cqgjcx1zCfeB7wRq;
41 | expires=Thu, 31-Dec-2015 16:02:17 GMT; path=/; domain=.google.com
42 | Accept-Ranges:
43 | - none
44 | Vary:
45 | - Accept-Encoding
46 | Transfer-Encoding:
47 | - chunked
48 | body:
49 | encoding: UTF-8
50 | string: |
51 | Rank_1:1:9
52 | http_version:
53 | recorded_at: Mon, 14 Sep 2015 22:48:49 GMT
54 | recorded_with: VCR 2.9.3
55 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_Google/_run/with_no_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: http://toolbarqueries.google.com/tbr?ch=63624986523&client=navclient-auto&features=Rank&q=info:please-dont-register-a-site-that-breaks-this-test.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Date:
19 | - Mon, 14 Sep 2015 22:48:49 GMT
20 | Pragma:
21 | - no-cache
22 | Expires:
23 | - Fri, 01 Jan 1990 00:00:00 GMT
24 | Cache-Control:
25 | - no-cache, must-revalidate
26 | Content-Type:
27 | - text/html; charset=ISO-8859-1
28 | P3p:
29 | - CP="This is not a P3P policy! See http://www.google.com/support/accounts/bin/answer.py?hl=en&answer=151657
30 | for more info."
31 | Server:
32 | - gws
33 | Content-Length:
34 | - '0'
35 | X-Xss-Protection:
36 | - 1; mode=block
37 | X-Frame-Options:
38 | - SAMEORIGIN
39 | Set-Cookie:
40 | - NID=71=G3F4fqx4te-5FRSqGqkW-BAM7X2grah41e6bAzWdKGSunS_j-TZ3yD2AZ7zymGaiTyVCIapK7-82fwwz2QxGe0PIIAv1ts95HWOcDC2I2HowKJz_uE5ZLLJJmWHU6JJh;
41 | expires=Tue, 15-Mar-2016 22:48:49 GMT; path=/; domain=.google.com; HttpOnly
42 | - PREF=ID=1111111111111111:FF=0:TM=1442270929:LM=1442270929:V=1:S=cqgjcx1zCfeB7wRq;
43 | expires=Thu, 31-Dec-2015 16:02:17 GMT; path=/; domain=.google.com
44 | body:
45 | encoding: UTF-8
46 | string: ''
47 | http_version:
48 | recorded_at: Mon, 14 Sep 2015 22:48:49 GMT
49 | recorded_with: VCR 2.9.3
50 |
--------------------------------------------------------------------------------
/spec/fixtures/vcr_cassettes/PageRankr_Ranks_MozRank/_run/with_match/.yml:
--------------------------------------------------------------------------------
1 | ---
2 | http_interactions:
3 | - request:
4 | method: get
5 | uri: http://bagics.com/moz-rank.html?domain=www.google.com
6 | body:
7 | encoding: US-ASCII
8 | string: ''
9 | headers:
10 | User-Agent:
11 | - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/534.56.5 (KHTML,
12 | like Gecko) Version/5.1.6 Safari/534.56.5
13 | response:
14 | status:
15 | code: 200
16 | message: OK
17 | headers:
18 | Date:
19 | - Mon, 14 Sep 2015 22:49:44 GMT
20 | Content-Type:
21 | - text/html; charset=UTF-8
22 | Transfer-Encoding:
23 | - chunked
24 | Connection:
25 | - keep-alive
26 | Set-Cookie:
27 | - __cfduid=dbd3f1720b392f71e1d2339e8ddcf1ffc1442270975; expires=Tue, 13-Sep-16
28 | 22:49:35 GMT; path=/; domain=.bagics.com; HttpOnly
29 | X-Powered-By:
30 | - PHP/5.3.3
31 | Vary:
32 | - Accept-Encoding,User-Agent
33 | Cache-Control:
34 | - !binary |-
35 | 4oCccHVibGlj4oCd
36 | Server:
37 | - cloudflare-nginx
38 | Cf-Ray:
39 | - 225fa55bafa12501-ORD
40 | body:
41 | encoding: UTF-8
42 | string: "\n\n\nCheck
44 | MOZ Rank of Domain for FREE | Bagics.com | All In One SEO Companion\n\n\n\n\n\n