├── .gitignore
├── CODE_OF_CONDUCT.md
├── Gemfile
├── LICENSE.txt
├── README.md
├── Rakefile
├── bin
    ├── console
    └── setup
├── lib
    ├── proxycrawl.rb
    └── proxycrawl
    │   ├── api.rb
    │   ├── leads_api.rb
    │   ├── scraper_api.rb
    │   ├── screenshots_api.rb
    │   ├── storage_api.rb
    │   └── version.rb
├── proxycrawl.gemspec
└── spec
    ├── api_spec.rb
    ├── screenshots_api_spec.rb
    ├── spec_helper.rb
    └── storage_api_spec.rb


/.gitignore:
--------------------------------------------------------------------------------
 1 | /.bundle/
 2 | /.yardoc
 3 | /Gemfile.lock
 4 | /_yardoc/
 5 | /coverage/
 6 | /doc/
 7 | /pkg/
 8 | /spec/reports/
 9 | /tmp/
10 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, gender identity and expression, level of experience,
 9 | nationality, personal appearance, race, religion, or sexual identity and
10 | orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at info@proxycrawl.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at [http://contributor-covenant.org/version/1/4][version]
72 | 
73 | [homepage]: http://contributor-covenant.org
74 | [version]: http://contributor-covenant.org/version/1/4/
75 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 | 
3 | gemspec
4 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2023 ProxyCrawl
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DEPRECATION NOTICE
  2 | 
  3 | > :warning: **IMPORTANT:** This gem is no longer maintained or supported. For the latest updates, please use our new gem at [crawlbase-ruby](https://github.com/crawlbase-source/crawlbase-ruby).
  4 | 
  5 | ---
  6 | 
  7 | # ProxyCrawl
  8 | 
  9 | Dependency free gem for scraping and crawling websites using the ProxyCrawl API.
 10 | 
 11 | ## Installation
 12 | 
 13 | Add this line to your application's Gemfile:
 14 | 
 15 | ```ruby
 16 | gem 'proxycrawl'
 17 | ```
 18 | 
 19 | And then execute:
 20 | 
 21 |     $ bundle
 22 | 
 23 | Or install it yourself as:
 24 | 
 25 |     $ gem install proxycrawl
 26 | 
 27 | ## Crawling API Usage
 28 | 
 29 | Require the gem in your project
 30 | 
 31 | ```ruby
 32 | require 'proxycrawl'
 33 | ```
 34 | 
 35 | Initialize the API with one of your account tokens, either normal or javascript token. Then make get or post requests accordingly.
 36 | 
 37 | You can get a token for free by [creating a ProxyCrawl account](https://proxycrawl.com/signup) and 1000 free testing requests. You can use them for tcp calls or javascript calls or both.
 38 | 
 39 | ```ruby
 40 | api = ProxyCrawl::API.new(token: 'YOUR_TOKEN')
 41 | ```
 42 | 
 43 | ### GET requests
 44 | 
 45 | Pass the url that you want to scrape plus any options from the ones available in the [API documentation](https://proxycrawl.com/dashboard/docs).
 46 | 
 47 | ```ruby
 48 | api.get(url, options)
 49 | ```
 50 | 
 51 | Example:
 52 | 
 53 | ```ruby
 54 | 
 55 | begin
 56 |   response = api.get('https://www.facebook.com/britneyspears')
 57 |   puts response.status_code
 58 |   puts response.original_status
 59 |   puts response.pc_status
 60 |   puts response.body
 61 | rescue => exception
 62 |   puts exception.backtrace
 63 | end
 64 | 
 65 | ```
 66 | 
 67 | You can pass any options of what the ProxyCrawl API supports in exact param format.
 68 | 
 69 | Example:
 70 | 
 71 | ```ruby
 72 | options = {
 73 |   user_agent: 'Mozilla/5.0 (Windows NT 6.2; rv:20.0) Gecko/20121202 Firefox/30.0',
 74 |   format: 'json'
 75 | }
 76 | 
 77 | response = api.get('https://www.reddit.com/r/pics/comments/5bx4bx/thanks_obama/', options)
 78 | 
 79 | puts response.status_code
 80 | puts response.body # read the API json response
 81 | ```
 82 | 
 83 | ### POST requests
 84 | 
 85 | Pass the url that you want to scrape, the data that you want to send which can be either a json or a string, plus any options from the ones available in the [API documentation](https://proxycrawl.com/dashboard/docs).
 86 | 
 87 | ```ruby
 88 | api.post(url, data, options);
 89 | ```
 90 | 
 91 | Example:
 92 | 
 93 | ```ruby
 94 | api.post('https://producthunt.com/search', { text: 'example search' })
 95 | ```
 96 | 
 97 | You can send the data as application/json instead of x-www-form-urlencoded by setting options `post_content_type` as json.
 98 | 
 99 | ```ruby
100 | response = api.post('https://httpbin.org/post', { some_json: 'with some value' }, { post_content_type: 'json' })
101 | 
102 | puts response.status_code
103 | puts response.body
104 | 
105 | ```
106 | 
107 | ### Javascript requests
108 | 
109 | If you need to scrape any website built with Javascript like React, Angular, Vue, etc. You just need to pass your javascript token and use the same calls. Note that only `.get` is available for javascript and not `.post`.
110 | 
111 | ```ruby
112 | api = ProxyCrawl::API.new(token: 'YOUR_JAVASCRIPT_TOKEN' })
113 | ```
114 | 
115 | ```ruby
116 | response = api.get('https://www.nfl.com')
117 | puts response.status_code
118 | puts response.body
119 | ```
120 | 
121 | Same way you can pass javascript additional options.
122 | 
123 | ```ruby
124 | response = api.get('https://www.freelancer.com', options: { page_wait: 5000 })
125 | puts response.status_code
126 | ```
127 | 
128 | ## Original status
129 | 
130 | You can always get the original status and proxycrawl status from the response. Read the [ProxyCrawl documentation](https://proxycrawl.com/dashboard/docs) to learn more about those status.
131 | 
132 | ```ruby
133 | response = api.get('https://sfbay.craigslist.org/')
134 | 
135 | puts response.original_status
136 | puts response.pc_status
137 | ```
138 | 
139 | ## Scraper API usage
140 | 
141 | Initialize the Scraper API using your normal token and call the `get` method.
142 | 
143 | ```ruby
144 | scraper_api = ProxyCrawl::ScraperAPI.new(token: 'YOUR_TOKEN')
145 | ```
146 | 
147 | Pass the url that you want to scrape plus any options from the ones available in the [Scraper API documentation](https://proxycrawl.com/docs/scraper-api/parameters).
148 | 
149 | ```ruby
150 | api.get(url, options)
151 | ```
152 | 
153 | Example:
154 | 
155 | ```ruby
156 | begin
157 |   response = scraper_api.get('https://www.amazon.com/Halo-SleepSack-Swaddle-Triangle-Neutral/dp/B01LAG1TOS')
158 |   puts response.remaining_requests
159 |   puts response.status_code
160 |   puts response.body
161 | rescue => exception
162 |   puts exception.backtrace
163 | end
164 | ```
165 | 
166 | ## Leads API usage
167 | 
168 | Initialize with your Leads API token and call the `get` method.
169 | 
170 | For more details on the implementation, please visit the [Leads API documentation](https://proxycrawl.com/docs/leads-api).
171 | 
172 | ```ruby
173 | leads_api = ProxyCrawl::LeadsAPI.new(token: 'YOUR_TOKEN')
174 | 
175 | begin
176 |   response = leads_api.get('stripe.com')
177 |   puts response.success
178 |   puts response.remaining_requests
179 |   puts response.status_code
180 |   puts response.body
181 | rescue => exception
182 |   puts exception.backtrace
183 | end
184 | ```
185 | 
186 | If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
187 | 
188 | 
189 | ## Screenshots API usage
190 | 
191 | Initialize with your Screenshots API token and call the `get` method.
192 | 
193 | ```ruby
194 | screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
195 | 
196 | begin
197 |   response = screenshots_api.get('https://www.apple.com')
198 |   puts response.success
199 |   puts response.remaining_requests
200 |   puts response.status_code
201 |   puts response.screenshot_path # do something with screenshot_path here
202 | rescue => exception
203 |   puts exception.backtrace
204 | end
205 | ```
206 | 
207 | or with using a block
208 | 
209 | ```ruby
210 | screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
211 | 
212 | begin
213 |   response = screenshots_api.get('https://www.apple.com') do |file|
214 |     # do something (reading/writing) with the image file here
215 |   end
216 |   puts response.success
217 |   puts response.remaining_requests
218 |   puts response.status_code
219 | rescue => exception
220 |   puts exception.backtrace
221 | end
222 | ```
223 | 
224 | or specifying a file path
225 | 
226 | ```ruby
227 | screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
228 | 
229 | begin
230 |   response = screenshots_api.get('https://www.apple.com', save_to_path: '~/screenshot.jpg') do |file|
231 |     # do something (reading/writing) with the image file here
232 |   end
233 |   puts response.success
234 |   puts response.remaining_requests
235 |   puts response.status_code
236 | rescue => exception
237 |   puts exception.backtrace
238 | end
239 | ```
240 | 
241 | Note that `screenshots_api.get(url, options)` method accepts an [options](https://proxycrawl.com/docs/screenshots-api/parameters)
242 | 
243 | ## Storage API usage
244 | 
245 | Initialize the Storage API using your private token.
246 | 
247 | ```ruby
248 | storage_api = ProxyCrawl::StorageAPI.new(token: 'YOUR_TOKEN')
249 | ```
250 | 
251 | Pass the [url](https://proxycrawl.com/docs/storage-api/parameters/#url) that you want to get from [Proxycrawl Storage](https://proxycrawl.com/dashboard/storage).
252 | 
253 | ```ruby
254 | begin
255 |   response = storage_api.get('https://www.apple.com')
256 |   puts response.original_status
257 |   puts response.pc_status
258 |   puts response.url
259 |   puts response.status_code
260 |   puts response.rid
261 |   puts response.body
262 |   puts response.stored_at
263 | rescue => exception
264 |   puts exception.backtrace
265 | end
266 | ```
267 | 
268 | or you can use the [RID](https://proxycrawl.com/docs/storage-api/parameters/#rid)
269 | 
270 | ```ruby
271 | begin
272 |   response = storage_api.get(RID)
273 |   puts response.original_status
274 |   puts response.pc_status
275 |   puts response.url
276 |   puts response.status_code
277 |   puts response.rid
278 |   puts response.body
279 |   puts response.stored_at
280 | rescue => exception
281 |   puts exception.backtrace
282 | end
283 | ```
284 | 
285 | Note: One of the two RID or URL must be sent. So both are optional but it's mandatory to send one of the two.
286 | 
287 | ### [Delete](https://proxycrawl.com/docs/storage-api/delete/) request
288 | 
289 | To delete a storage item from your storage area, use the correct RID
290 | 
291 | ```ruby
292 | if storage_api.delete(RID)
293 |   puts 'delete success'
294 | else
295 |   puts "Unable to delete: #{storage_api.body['error']}"
296 | end
297 | ```
298 | 
299 | ### [Bulk](https://proxycrawl.com/docs/storage-api/bulk/) request
300 | 
301 | To do a bulk request with a list of RIDs, please send the list of rids as an array
302 | 
303 | ```ruby
304 | begin
305 |   response = storage_api.bulk([RID1, RID2, RID3, ...])
306 |   puts response.original_status
307 |   puts response.pc_status
308 |   puts response.url
309 |   puts response.status_code
310 |   puts response.rid
311 |   puts response.body
312 |   puts response.stored_at
313 | rescue => exception
314 |   puts exception.backtrace
315 | end
316 | ```
317 | 
318 | ### [RIDs](https://proxycrawl.com/docs/storage-api/rids) request
319 | 
320 | To request a bulk list of RIDs from your storage area
321 | 
322 | ```ruby
323 | begin
324 |   response = storage_api.rids
325 |   puts response.status_code
326 |   puts response.rid
327 |   puts response.body
328 | rescue => exception
329 |   puts exception.backtrace
330 | end
331 | ```
332 | 
333 | You can also specify a limit as a parameter
334 | 
335 | ```ruby
336 | storage_api.rids(100)
337 | ```
338 | 
339 | ### [Total Count](https://proxycrawl.com/docs/storage-api/total_count)
340 | 
341 | To get the total number of documents in your storage area
342 | 
343 | ```ruby
344 | total_count = storage_api.total_count
345 | puts "total_count: #{total_count}"
346 | ```
347 | 
348 | If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
349 | 
350 | ## Development
351 | 
352 | After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
353 | 
354 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
355 | 
356 | ## Contributing
357 | 
358 | Bug reports and pull requests are welcome on GitHub at https://github.com/proxycrawl/proxycrawl-ruby. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
359 | 
360 | ## License
361 | 
362 | The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
363 | 
364 | ## Code of Conduct
365 | 
366 | Everyone interacting in the Proxycrawl project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/proxycrawl/proxycrawl-ruby/blob/master/CODE_OF_CONDUCT.md).
367 | 
368 | ---
369 | 
370 | Copyright 2023 ProxyCrawl
371 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require "bundler/gem_tasks"
2 | task :default => :spec
3 | 


--------------------------------------------------------------------------------
/bin/console:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require "bundler/setup"
 4 | require "proxycrawl"
 5 | 
 6 | # You can add fixtures and/or initialization code here to make experimenting
 7 | # with your gem easier. You can also use a different console, if you like.
 8 | 
 9 | # (If you use this, don't forget to add pry to your Gemfile!)
10 | # require "pry"
11 | # Pry.start
12 | 
13 | require "irb"
14 | IRB.start(__FILE__)
15 | 


--------------------------------------------------------------------------------
/bin/setup:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euo pipefail
3 | IFS=$'\n\t'
4 | set -vx
5 | 
6 | bundle install
7 | 
8 | # Do any other automated setup that you need to do here
9 | 


--------------------------------------------------------------------------------
/lib/proxycrawl.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'proxycrawl/version'
 4 | require 'proxycrawl/api'
 5 | require 'proxycrawl/scraper_api'
 6 | require 'proxycrawl/leads_api'
 7 | require 'proxycrawl/screenshots_api'
 8 | require 'proxycrawl/storage_api'
 9 | 
10 | module ProxyCrawl
11 | end
12 | 


--------------------------------------------------------------------------------
/lib/proxycrawl/api.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'net/http'
 4 | require 'json'
 5 | require 'uri'
 6 | 
 7 | module ProxyCrawl
 8 |   class API
 9 |     attr_reader :token, :body, :timeout, :status_code, :original_status, :pc_status, :url, :storage_url
10 | 
11 |     INVALID_TOKEN = 'Token is required'
12 |     INVALID_URL = 'URL is required'
13 | 
14 |     def initialize(options = {})
15 |       raise INVALID_TOKEN if options[:token].nil?
16 | 
17 |       @token = options[:token]
18 |       @timeout = options[:timeout] || 120
19 |     end
20 | 
21 |     def get(url, options = {})
22 |       raise INVALID_URL if url.empty?
23 | 
24 |       uri = prepare_uri(url, options)
25 |       req = Net::HTTP::Get.new(uri)
26 | 
27 |       req_options = {
28 |         read_timeout: timeout,
29 |         use_ssl: uri.scheme == 'https',
30 |         verify_mode: OpenSSL::SSL::VERIFY_NONE
31 |       }
32 | 
33 |       response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
34 | 
35 |       prepare_response(response, options[:format])
36 | 
37 |       self
38 |     end
39 | 
40 |     def post(url, data, options = {})
41 |       raise INVALID_URL if url.empty?
42 | 
43 |       uri = prepare_uri(url, options)
44 | 
45 |       http = Net::HTTP.new(uri.host, uri.port)
46 | 
47 |       http.use_ssl = true
48 | 
49 |       content_type = options[:post_content_type].to_s.include?('json') ? { 'Content-Type': 'text/json' } : nil
50 | 
51 |       request = Net::HTTP::Post.new(uri.request_uri, content_type)
52 | 
53 |       if options[:post_content_type].to_s.include?('json')
54 |         request.body = data.to_json
55 |       else
56 |         request.set_form_data(data)
57 |       end
58 | 
59 |       response = http.request(request)
60 | 
61 |       prepare_response(response, options[:format])
62 | 
63 |       self
64 |     end
65 | 
66 |     private
67 | 
68 |     def base_url
69 |       'https://api.proxycrawl.com'
70 |     end
71 | 
72 |     def prepare_uri(url, options)
73 |       uri = URI(base_url)
74 |       uri.query = URI.encode_www_form({ token: @token, url: url }.merge(options))
75 | 
76 |       uri
77 |     end
78 | 
79 |     def prepare_response(response, format)
80 |       res = format == 'json' || base_url.include?('/scraper') ? JSON.parse(response.body) : response
81 | 
82 |       @original_status = res['original_status'].to_i
83 |       @pc_status = res['pc_status'].to_i
84 |       @url = res['url']
85 |       @storage_url = res['storage_url']
86 |       @status_code = response.code.to_i
87 |       @body = response.body
88 |     end
89 |   end
90 | end
91 | 


--------------------------------------------------------------------------------
/lib/proxycrawl/leads_api.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'net/http'
 4 | require 'json'
 5 | require 'uri'
 6 | 
 7 | module ProxyCrawl
 8 |   class LeadsAPI
 9 |     attr_reader :token, :timeout, :body, :status_code, :success, :remaining_requests
10 | 
11 |     INVALID_TOKEN = 'Token is required'
12 |     INVALID_DOMAIN = 'Domain is required'
13 | 
14 |     def initialize(options = {})
15 |       raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
16 | 
17 |       @token = options[:token]
18 |       @timeout = options[:timeout] || 120
19 |     end
20 | 
21 |     def get(domain)
22 |       raise INVALID_DOMAIN if domain.empty?
23 | 
24 |       uri = URI('https://api.proxycrawl.com/leads')
25 |       uri.query = URI.encode_www_form({ token: token, domain: domain })
26 | 
27 |       req = Net::HTTP::Get.new(uri)
28 | 
29 |       req_options = {
30 |         read_timeout: timeout,
31 |         use_ssl: uri.scheme == 'https',
32 |         verify_mode: OpenSSL::SSL::VERIFY_NONE
33 |       }
34 | 
35 |       response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
36 |       @status_code = response.code.to_i
37 |       @body = response.body
38 | 
39 |       json_body = JSON.parse(response.body)
40 |       @success = json_body['success']
41 |       @remaining_requests = json_body['remaining_requests'].to_i
42 | 
43 |       self
44 |     end
45 | 
46 |     def post
47 |       raise 'Only GET is allowed for the LeadsAPI'
48 |     end
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/lib/proxycrawl/scraper_api.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module ProxyCrawl
 4 |   class ScraperAPI < ProxyCrawl::API
 5 |     attr_reader :remaining_requests
 6 | 
 7 |     def post
 8 |       raise 'Only GET is allowed for the ScraperAPI'
 9 |     end
10 | 
11 |     private
12 | 
13 |     def prepare_response(response, format)
14 |       super(response, format)
15 |       json_body = JSON.parse(response.body)
16 |       @remaining_requests = json_body['remaining_requests'].to_i
17 |     end
18 | 
19 |     def base_url
20 |       'https://api.proxycrawl.com/scraper'
21 |     end
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/proxycrawl/screenshots_api.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'securerandom'
 4 | require 'tmpdir'
 5 | 
 6 | module ProxyCrawl
 7 |   class ScreenshotsAPI < ProxyCrawl::API
 8 |     attr_reader :screenshot_path, :success, :remaining_requests, :screenshot_url
 9 | 
10 |     INVALID_SAVE_TO_PATH_FILENAME = 'Filename must end with .jpg or .jpeg'
11 |     SAVE_TO_PATH_FILENAME_PATTERN = /.+\.(jpg|JPG|jpeg|JPEG)$/.freeze
12 | 
13 |     def post
14 |       raise 'Only GET is allowed for the ScreenshotsAPI'
15 |     end
16 | 
17 |     def get(url, options = {})
18 |       screenshot_path = options.delete(:save_to_path) || generate_file_path
19 |       raise INVALID_SAVE_TO_PATH_FILENAME unless SAVE_TO_PATH_FILENAME_PATTERN =~ screenshot_path
20 | 
21 |       response = super(url, options)
22 |       file = File.open(screenshot_path, 'w+')
23 |       file.write(response.body&.force_encoding('UTF-8'))
24 |       @screenshot_path = screenshot_path
25 |       yield(file) if block_given?
26 |       response
27 |     ensure
28 |       file&.close
29 |     end
30 | 
31 |     private
32 | 
33 |     def prepare_response(response, format)
34 |       super(response, format)
35 |       @remaining_requests = response['remaining_requests'].to_i
36 |       @success = response['success'] == 'true'
37 |       @screenshot_url = response['screenshot_url']
38 |     end
39 | 
40 |     def base_url
41 |       'https://api.proxycrawl.com/screenshots'
42 |     end
43 | 
44 |     def generate_file_name
45 |       "#{SecureRandom.urlsafe_base64}.jpg"
46 |     end
47 | 
48 |     def generate_file_path
49 |       File.join(Dir.tmpdir, generate_file_name)
50 |     end
51 |   end
52 | end
53 | 


--------------------------------------------------------------------------------
/lib/proxycrawl/storage_api.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'net/http'
  4 | require 'json'
  5 | require 'uri'
  6 | 
  7 | module ProxyCrawl
  8 |   class StorageAPI
  9 |     attr_reader :token, :timeout, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
 10 | 
 11 |     INVALID_TOKEN = 'Token is required'
 12 |     INVALID_RID = 'RID is required'
 13 |     INVALID_RID_ARRAY = 'One or more RIDs are required'
 14 |     INVALID_URL_OR_RID = 'Either URL or RID is required'
 15 |     BASE_URL = 'https://api.proxycrawl.com/storage'
 16 | 
 17 |     def initialize(options = {})
 18 |       raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
 19 | 
 20 |       @token = options[:token]
 21 |       @timeout = options[:timeout] || 120
 22 |     end
 23 | 
 24 |     def get(url_or_rid, format = 'html')
 25 |       raise INVALID_URL_OR_RID if url_or_rid.nil? || url_or_rid.empty?
 26 | 
 27 |       uri = URI(BASE_URL)
 28 |       uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
 29 | 
 30 |       req = Net::HTTP::Get.new(uri)
 31 | 
 32 |       req_options = {
 33 |         read_timeout: timeout,
 34 |         use_ssl: uri.scheme == 'https',
 35 |         verify_mode: OpenSSL::SSL::VERIFY_NONE
 36 |       }
 37 | 
 38 |       response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
 39 | 
 40 |       res = format == 'json' ? JSON.parse(response.body) : response
 41 | 
 42 |       @original_status = res['original_status'].to_i
 43 |       @pc_status = res['pc_status'].to_i
 44 |       @url = res['url']
 45 |       @rid = res['rid']
 46 |       @stored_at = res['stored_at']
 47 | 
 48 |       @status_code = response.code.to_i
 49 |       @body = response.body
 50 | 
 51 |       self
 52 |     end
 53 | 
 54 |     def delete(rid)
 55 |       raise INVALID_RID if rid.nil? || rid.empty?
 56 | 
 57 |       uri = URI(BASE_URL)
 58 |       uri.query = URI.encode_www_form(token: token, rid: rid)
 59 |       http = Net::HTTP.new(uri.host)
 60 |       request = Net::HTTP::Delete.new(uri.request_uri)
 61 |       response = http.request(request)
 62 | 
 63 |       @url, @original_status, @pc_status, @stored_at = nil
 64 |       @status_code = response.code.to_i
 65 |       @rid = rid
 66 |       @body = JSON.parse(response.body)
 67 | 
 68 |       @body.key?('success')
 69 |     end
 70 | 
 71 |     def bulk(rids_array = [])
 72 |       raise INVALID_RID_ARRAY if rids_array.empty?
 73 | 
 74 |       uri = URI("#{BASE_URL}/bulk")
 75 |       uri.query = URI.encode_www_form(token: token)
 76 |       http = Net::HTTP.new(uri.host)
 77 |       request = Net::HTTP::Post.new(uri.request_uri, { 'Content-Type': 'application/json' })
 78 |       request.body = { rids: rids_array }.to_json
 79 |       response = http.request(request)
 80 | 
 81 |       @body = JSON.parse(response.body)
 82 |       @original_status = @body.map { |item| item['original_status'].to_i }
 83 |       @status_code = response.code.to_i
 84 |       @pc_status = @body.map { |item| item['pc_status'].to_i }
 85 |       @url = @body.map { |item| item['url'] }
 86 |       @rid = @body.map { |item| item['rid'] }
 87 |       @stored_at = @body.map { |item| item['stored_at'] }
 88 | 
 89 |       self
 90 |     end
 91 | 
 92 |     def rids(limit = -1)
 93 |       uri = URI("#{BASE_URL}/rids")
 94 |       query_hash = { token: token }
 95 |       query_hash.merge!({ limit: limit }) if limit >= 0
 96 |       uri.query = URI.encode_www_form(query_hash)
 97 | 
 98 |       response = Net::HTTP.get_response(uri)
 99 |       @url, @original_status, @pc_status, @stored_at = nil
100 |       @status_code = response.code.to_i
101 |       @body = JSON.parse(response.body)
102 |       @rid = @body
103 | 
104 |       @body
105 |     end
106 | 
107 |     def total_count
108 |       uri = URI("#{BASE_URL}/total_count")
109 |       uri.query = URI.encode_www_form(token: token)
110 | 
111 |       response = Net::HTTP.get_response(uri)
112 |       @url, @original_status, @pc_status, @stored_at = nil
113 |       @status_code = response.code.to_i
114 |       @rid = rid
115 |       @body = JSON.parse(response.body)
116 | 
117 |       body['totalCount']
118 |     end
119 | 
120 |     private
121 | 
122 |     def decide_url_or_rid(url_or_rid)
123 |       %r{^https?://} =~ url_or_rid ? { url: url_or_rid } : { rid: url_or_rid }
124 |     end
125 |   end
126 | end
127 | 


--------------------------------------------------------------------------------
/lib/proxycrawl/version.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | module ProxyCrawl
4 |   VERSION = '1.0.2'
5 | end
6 | 


--------------------------------------------------------------------------------
/proxycrawl.gemspec:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | lib = File.expand_path("../lib", __FILE__)
 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 4 | require "proxycrawl/version"
 5 | 
 6 | Gem::Specification.new do |spec|
 7 |   spec.name        = "proxycrawl"
 8 |   spec.version     = ProxyCrawl::VERSION
 9 |   spec.platform    = Gem::Platform::RUBY
10 |   spec.authors     = ["proxycrawl"]
11 |   spec.email       = ["info@proxycrawl.com"]
12 |   spec.summary     = %q{ProxyCrawl API client for web scraping and crawling}
13 |   spec.description = %q{Ruby based client for the ProxyCrawl API that helps developers crawl or scrape thousands of web pages anonymously}
14 |   spec.homepage    = "https://github.com/proxycrawl/proxycrawl-ruby"
15 |   spec.license     = "MIT"
16 | 
17 |   spec.files         = `git ls-files -z`.split("\x0").reject do |f|
18 |     f.match(%r{^(test|spec|features)/})
19 |   end
20 | 
21 |   spec.required_ruby_version = '>= 2.0'
22 | 
23 |   spec.bindir        = "exe"
24 |   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25 |   spec.require_paths = ["lib"]
26 | 
27 |   spec.add_development_dependency "rspec", "~> 3.2"
28 |   spec.add_development_dependency "webmock", "~> 3.4"
29 |   spec.add_development_dependency "bundler", "~> 2.0"
30 |   spec.add_development_dependency "rake", "~> 12.3.3"
31 | 
32 |   # Deprecation warning
33 |   spec.post_install_message = <<~MESSAGE
34 |     ================================================================================
35 |     DEPRECATION WARNING - 'proxycrawl' gem
36 |     ================================================================================
37 | 
38 |     'proxycrawl' is deprecated due to rebranding. Please switch to the 'crawlbase' gem.
39 | 
40 |     More details and migration guide: https://github.com/crawlbase-source/crawlbase-ruby
41 |     ================================================================================
42 |   MESSAGE
43 | end
44 | 


--------------------------------------------------------------------------------
/spec/api_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper.rb'
 2 | require 'proxycrawl'
 3 | 
 4 | describe ProxyCrawl::API do
 5 |   it 'raises an error if token is missing' do
 6 |     expect { ProxyCrawl::API.new }.to raise_error(RuntimeError, 'Token is required')
 7 |   end
 8 | 
 9 |   it 'sets/reads token' do
10 |     expect(ProxyCrawl::API.new(token: 'test').token).to eql('test')
11 |   end
12 | 
13 |   describe '#get' do
14 |     it 'sends an get request to ProxyCrawl API' do
15 |       stub_request(:get, 'https://api.proxycrawl.com/?token=test&url=http%3A%2F%2Fhttpbin.org%2Fanything%3Fparam1%3Dx%26params2%3Dy').
16 |         to_return(
17 |           body: 'body',
18 |           status: 200,
19 |           headers: { skip_normalize: true, 'original_status' => 200, 'pc_status' => 200, 'url' => 'http://httpbin.org/anything?param1=x&params2=y'})
20 | 
21 |       api = ProxyCrawl::API.new(token: 'test')
22 | 
23 |       response = api.get('http://httpbin.org/anything?param1=x&params2=y')
24 | 
25 |       expect(response.status_code).to eql(200)
26 |       expect(response.original_status).to eql(200)
27 |       expect(response.pc_status).to eql(200)
28 |       expect(response.url).to eql('http://httpbin.org/anything?param1=x&params2=y')
29 |       expect(response.body).to eql('body')
30 |     end
31 |   end
32 | 
33 |   describe '#post' do
34 |     it 'sends a post request to ProxyCrawl API with json data' do
35 |       stub_request(:post, 'https://api.proxycrawl.com/?post_content_type=json&token=test&url=http://httpbin.org/post').
36 |         with(body: "{\"foo\":\"bar\"}").
37 |         to_return(
38 |           body: 'body',
39 |           status: 200,
40 |           headers: { skip_normalize: true, 'original_status' => 200, 'pc_status' => 200, 'url' => 'http://httpbin.org/anything?param1=x&params2=y'})
41 | 
42 |       api = ProxyCrawl::API.new(token: 'test')
43 | 
44 |       response = api.post("http://httpbin.org/post", { foo: 'bar' }, { post_content_type: 'json'} )
45 | 
46 |       expect(response.status_code).to eql(200)
47 |       expect(response.original_status).to eql(200)
48 |       expect(response.pc_status).to eql(200)
49 |       expect(response.url).to eql('http://httpbin.org/anything?param1=x&params2=y')
50 |       expect(response.body).to eql('body')
51 |     end
52 | 
53 |     it 'sends a post request to ProxyCrawl API with form data' do
54 |       stub_request(:post, 'https://api.proxycrawl.com/?token=test&url=http://httpbin.org/post').
55 |         with(body: { "foo" => "bar" }).
56 |         to_return(
57 |           body: 'body',
58 |           status: 200,
59 |           headers: { skip_normalize: true, 'original_status' => 200, 'pc_status' => 200, 'url' => 'http://httpbin.org/anything?param1=x&params2=y'})
60 | 
61 |       api = ProxyCrawl::API.new(token: 'test')
62 | 
63 |       response = api.post("http://httpbin.org/post", { foo: 'bar' } )
64 | 
65 |       expect(response.status_code).to eql(200)
66 |       expect(response.original_status).to eql(200)
67 |       expect(response.pc_status).to eql(200)
68 |       expect(response.url).to eql('http://httpbin.org/anything?param1=x&params2=y')
69 |       expect(response.body).to eql('body')
70 |     end
71 |   end
72 | 
73 | end
74 | 


--------------------------------------------------------------------------------
/spec/screenshots_api_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'proxycrawl'
 3 | 
 4 | describe ProxyCrawl::ScreenshotsAPI do
 5 |   it 'raises an error if token is missing' do
 6 |     expect { ProxyCrawl::ScreenshotsAPI.new }.to raise_error(RuntimeError, 'Token is required')
 7 |   end
 8 | 
 9 |   it 'sets/reads token' do
10 |     expect(ProxyCrawl::ScreenshotsAPI.new(token: 'test').token).to eql('test')
11 |   end
12 | 
13 |   describe '#get' do
14 |     before(:each) do
15 |       stub_request(:get, 'https://api.proxycrawl.com/screenshots?token=test&url=http%3A%2F%2Fhttpbin.org%2Fanything%3Fparam1%3Dx%26params2%3Dy').
16 |         to_return(
17 |           body: 'body',
18 |           status: 200,
19 |           headers: { skip_normalize: true, 'original_status' => 200, 'pc_status' => 200, 'url' => 'http://httpbin.org/anything?param1=x&params2=y'})
20 |     end
21 | 
22 |     it 'sends an get request to ProxyCrawl Screenshots API' do
23 |       api = ProxyCrawl::ScreenshotsAPI.new(token: 'test')
24 | 
25 |       response = api.get("http://httpbin.org/anything?param1=x&params2=y")
26 | 
27 |       expect(response.status_code).to eql(200)
28 |       expect(response.original_status).to eql(200)
29 |       expect(response.pc_status).to eql(200)
30 |       expect(response.url).to eql('http://httpbin.org/anything?param1=x&params2=y')
31 |       expect(response.body).to eql('body')
32 |       expect(response.screenshot_path).not_to be_empty
33 |     end
34 | 
35 |     it 'accepts a valid save_to_path option' do
36 |       api = ProxyCrawl::ScreenshotsAPI.new(token: 'test')
37 | 
38 |       response = api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: save_to_path)
39 | 
40 |       expect(response.status_code).to eql(200)
41 |       expect(response.original_status).to eql(200)
42 |       expect(response.pc_status).to eql(200)
43 |       expect(response.url).to eql('http://httpbin.org/anything?param1=x&params2=y')
44 |       expect(response.body).to eql('body')
45 |       expect(response.screenshot_path).to eql(File.join(Dir.tmpdir, 'test-image.jpg'))
46 |     end
47 | 
48 |     it 'rejects an invalid save_to_path option' do
49 |       api = ProxyCrawl::ScreenshotsAPI.new(token: 'test')
50 | 
51 |       expect { api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: '~/images/image_filename.png') }.to raise_error(RuntimeError, 'Filename must end with .jpg or .jpeg')
52 |       expect { api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: 'image_filename.png') }.to raise_error(RuntimeError, 'Filename must end with .jpg or .jpeg')
53 |       expect { api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: '~/images/image_filename') }.to raise_error(RuntimeError, 'Filename must end with .jpg or .jpeg')
54 |       expect { api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: 'image_filename') }.to raise_error(RuntimeError, 'Filename must end with .jpg or .jpeg')
55 |     end
56 | 
57 |     it 'accepts a block' do
58 |       api = ProxyCrawl::ScreenshotsAPI.new(token: 'test')
59 | 
60 |       response = api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: save_to_path) do |file|
61 |         expect(file).to be_kind_of(File)
62 |         expect(file.path).to eql(File.join(Dir.tmpdir, 'test-image.jpg'))
63 |       end
64 | 
65 |       expect(response.status_code).to eql(200)
66 |       expect(response.original_status).to eql(200)
67 |       expect(response.pc_status).to eql(200)
68 |       expect(response.url).to eql('http://httpbin.org/anything?param1=x&params2=y')
69 |       expect(response.body).to eql('body')
70 |       expect(response.screenshot_path).to eql(File.join(Dir.tmpdir, 'test-image.jpg'))
71 |     end
72 |   end
73 | 
74 |   private
75 | 
76 |   def save_to_path
77 |     File.join(Dir.tmpdir, 'test-image.jpg')
78 |   end
79 | end
80 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | require 'webmock/rspec'
 3 | 
 4 | WebMock.disable_net_connect!(allow_localhost: true)
 5 | 
 6 | # patch to optionaly skip normalizing webmock response headers
 7 | if defined? WebMock::Response
 8 |   WebMock::Response.class_eval do
 9 |     def headers=(headers)
10 |       @headers = headers
11 |       if @headers && !@headers.is_a?(Proc)
12 |         @headers =
13 |           if @headers.key?(:skip_normalize)
14 |             @headers.delete(:skip_normalize)
15 |             @headers
16 |           else
17 |             WebMock::Util::Headers.normalize_headers(@headers)
18 |           end
19 |       end
20 |     end
21 |   end
22 | end


--------------------------------------------------------------------------------
/spec/storage_api_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'spec_helper'
  2 | require 'proxycrawl'
  3 | 
  4 | describe ProxyCrawl::StorageAPI do
  5 |   it 'raises an error if token is missing' do
  6 |     expect { ProxyCrawl::StorageAPI.new }.to raise_error(RuntimeError, 'Token is required')
  7 |   end
  8 | 
  9 |   context '#get' do
 10 |     before(:each) do
 11 |       stub_request(:get, 'https://api.proxycrawl.com/storage?format=html&rid=1&token=test')
 12 |         .to_return(
 13 |           status: 200,
 14 |           body: {
 15 |             stored_at: '2021-03-01T14:22:58+02:00',
 16 |             original_status: 200,
 17 |             pc_status: 200,
 18 |             rid: '1',
 19 |             url: 'https://www.apple.com',
 20 |             body: '<html><head><title>Apple</title></head><body>Apple</body></html>'
 21 |           }.to_json
 22 |         )
 23 |     end
 24 | 
 25 |     subject { ProxyCrawl::StorageAPI.new(token: 'test') }
 26 | 
 27 |     it 'raises an error if parameter is missing' do
 28 |       expect { subject.get(nil) }.to raise_error(RuntimeError, 'Either URL or RID is required')
 29 |     end
 30 | 
 31 |     it 'returns storage info' do
 32 |       subject.get('1')
 33 |       expect(subject.body).to eq(
 34 |         {
 35 |           stored_at: '2021-03-01T14:22:58+02:00',
 36 |           original_status: 200,
 37 |           pc_status: 200,
 38 |           rid: '1',
 39 |           url: 'https://www.apple.com',
 40 |           body: '<html><head><title>Apple</title></head><body>Apple</body></html>'
 41 |         }.to_json
 42 |       )
 43 |     end
 44 |   end
 45 | 
 46 |   context '#delete' do
 47 |     subject { ProxyCrawl::StorageAPI.new(token: 'test') }
 48 | 
 49 |     it 'raises an error if parameter is missing' do
 50 |       expect { subject.delete(nil) }.to raise_error(RuntimeError, 'RID is required')
 51 |     end
 52 |   end
 53 | 
 54 |   context '#bulk' do
 55 |     before(:each) do
 56 |       stub_request(:post, 'http://api.proxycrawl.com/storage/bulk?token=test')
 57 |         .with(
 58 |           body: { rids: %w[1 2 3] }.to_json
 59 |         )
 60 |         .to_return(
 61 |           status: 200,
 62 |           body: [
 63 |             {
 64 |               stored_at: '2021-03-01T14:22:58+02:00',
 65 |               original_status: 200,
 66 |               pc_status: 200,
 67 |               rid: '1',
 68 |               url: 'https://www.apple.com',
 69 |               body: '<html><head><title>Apple</title></head><body>Apple</body></html>'
 70 |             },
 71 |             {
 72 |               stored_at: '2021-03-02T14:22:58+02:00',
 73 |               original_status: 200,
 74 |               pc_status: 200,
 75 |               rid: '2',
 76 |               url: 'https://www.google.com',
 77 |               body: '<html><head><title>Google</title></head><body>Google</body></html>'
 78 |             },
 79 |             {
 80 |               stored_at: '2021-03-03T14:22:58+02:00',
 81 |               original_status: 200,
 82 |               pc_status: 200,
 83 |               rid: '3',
 84 |               url: 'https://www.espn.com',
 85 |               body: '<html><head><title>ESPN</title></head><body>ESPN</body></html>'
 86 |             }
 87 |           ].to_json
 88 |         )
 89 |     end
 90 | 
 91 |     subject { ProxyCrawl::StorageAPI.new(token: 'test') }
 92 | 
 93 |     it 'raises an error if parameter is missing' do
 94 |       expect { subject.bulk([]) }.to raise_error(RuntimeError, 'One or more RIDs are required')
 95 |     end
 96 | 
 97 |     it 'returns an an array of storage info' do
 98 |       subject.bulk(%w[1 2 3])
 99 |       expect(subject.body).to eq(
100 |         [
101 |           {
102 |             'body' => '<html><head><title>Apple</title></head><body>Apple</body></html>',
103 |             'original_status' => 200,
104 |             'pc_status' => 200,
105 |             'rid' => '1',
106 |             'stored_at' => '2021-03-01T14:22:58+02:00',
107 |             'url' => 'https://www.apple.com'
108 |           },
109 |           {
110 |             'body' => '<html><head><title>Google</title></head><body>Google</body></html>',
111 |             'original_status' => 200,
112 |             'pc_status' => 200,
113 |             'rid' => '2',
114 |             'stored_at' => '2021-03-02T14:22:58+02:00',
115 |             'url' => 'https://www.google.com'
116 |           },
117 |           {
118 |             'body' => '<html><head><title>ESPN</title></head><body>ESPN</body></html>',
119 |             'original_status' => 200,
120 |             'pc_status' => 200,
121 |             'rid' => '3',
122 |             'stored_at' => '2021-03-03T14:22:58+02:00',
123 |             'url' => 'https://www.espn.com'
124 |           }
125 |         ]
126 |       )
127 |       expect(subject.rid).to eq(%w[1 2 3])
128 |       expect(subject.stored_at).to eq(
129 |         [
130 |           '2021-03-01T14:22:58+02:00',
131 |           '2021-03-02T14:22:58+02:00',
132 |           '2021-03-03T14:22:58+02:00'
133 |         ]
134 |       )
135 |       expect(subject.url).to eq(
136 |         [
137 |           'https://www.apple.com',
138 |           'https://www.google.com',
139 |           'https://www.espn.com'
140 |         ]
141 |       )
142 |     end
143 |   end
144 | 
145 |   context '#rids' do
146 |     before(:each) do
147 |       stub_request(:get, 'https://api.proxycrawl.com/storage/rids?token=test')
148 |         .to_return(
149 |           body: %w[1 2 3].to_json,
150 |           status: 200,
151 |           headers: { skip_normalize: true }
152 |         )
153 |     end
154 | 
155 |     subject { ProxyCrawl::StorageAPI.new(token: 'test') }
156 | 
157 |     it 'returns an array of rids' do
158 |       expect(subject.rids).to eq(%w[1 2 3])
159 |       expect(subject.body).to eq(%w[1 2 3])
160 |       expect(subject.rid).to eq(%w[1 2 3])
161 |     end
162 |   end
163 | 
164 |   context '#total_count' do
165 |     before(:each) do
166 |       stub_request(:get, 'https://api.proxycrawl.com/storage/total_count?token=test')
167 |         .to_return(
168 |           body: '{"totalCount": 123}',
169 |           status: 200,
170 |           headers: { skip_normalize: true }
171 |         )
172 |     end
173 | 
174 |     subject { ProxyCrawl::StorageAPI.new(token: 'test') }
175 | 
176 |     it 'returns the total count' do
177 |       expect(subject.total_count).to eq(123)
178 |     end
179 |   end
180 | end
181 | 


--------------------------------------------------------------------------------