├── .gitignore ├── .github └── FUNDING.yml ├── Gemfile ├── Dockerfile ├── Gemfile.lock ├── README.md ├── LICENSE └── academia-dl.rb /.gitignore: -------------------------------------------------------------------------------- 1 | *.pdf 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: ['https://ryanfb.xyz/etc/tip-jar'] 2 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gem 'nokogiri' 3 | gem 'open_uri_redirections' 4 | 5 | gem "addressable", "~> 2.8" 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ruby:latest 2 | 3 | # throw errors if Gemfile has been modified since Gemfile.lock 4 | RUN bundle config --global frozen 1 5 | 6 | WORKDIR /usr/src/app 7 | 8 | COPY Gemfile Gemfile.lock ./ 9 | RUN bundle install 10 | 11 | COPY . . 12 | 13 | VOLUME /data 14 | WORKDIR /data 15 | ENTRYPOINT ["bundle", "exec", "/usr/src/app/academia-dl.rb"] 16 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | addressable (2.8.0) 5 | public_suffix (>= 2.0.2, < 5.0) 6 | mini_portile2 (2.8.9) 7 | nokogiri (1.18.9) 8 | mini_portile2 (~> 2.8.2) 9 | racc (~> 1.4) 10 | open_uri_redirections (0.2.1) 11 | public_suffix (4.0.6) 12 | racc (1.8.1) 13 | 14 | PLATFORMS 15 | ruby 16 | 17 | DEPENDENCIES 18 | addressable (~> 2.8) 19 | nokogiri 20 | open_uri_redirections 21 | 22 | BUNDLED WITH 23 | 2.7.2 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # academia-dl 2 | 3 | Download PDFs from academia dot edu without logging in or creating an account. 4 | 5 | ## Usage 6 | 7 | Install the necessary gems with [bundler](https://bundler.io/) or `gem install nokogiri open_uri_redirections addressable`. Then run: 8 | 9 | ./academia-dl.rb "https://www.academia.edu/rest/of/url" 10 | 11 | ## Docker Usage 12 | 13 | docker run -ti -v "$(pwd)":/data ryanfb/academia-dl "https://www.academia.edu/rest/of/url" 14 | 15 | ## Tip Jar 16 | 17 | Appreciate my work? [Check out my Tip Jar for ways you can support it](https://ryanfb.xyz/etc/tip-jar) 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ryan Baumann 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /academia-dl.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'nokogiri' 4 | require 'uri' 5 | require 'open-uri' 6 | require 'open_uri_redirections' 7 | require 'addressable/uri' 8 | 9 | REFERER = 'http://scholar.google.com' 10 | PREFIX = 'https://www.academia.edu/download' 11 | OPEN_URI_OPTIONS = {"Referer" => REFERER, :allow_redirections => :all} 12 | MAX_RETRIES = 5 13 | 14 | ARGV.each do |academia_url| 15 | uri = Addressable::URI.parse(academia_url).normalize.to_s 16 | if URI(uri).host.nil? || URI(uri).path.nil? || URI(uri).path.empty? || !%{http https}.include?(URI(uri).scheme) 17 | $stderr.puts "Error parsing URL: #{academia_url}" 18 | exit 1 19 | end 20 | filename = "#{URI(uri).path.split('/').last[0..250]}.pdf" 21 | doc = nil 22 | if File.exist?(filename) 23 | $stderr.puts "#{filename} already exists, skipping" 24 | else 25 | if URI(uri).host.split('.')[-2..-1].join('.') != 'academia.edu' 26 | $stderr.puts "URL host must be 'academia.edu', error with URL: #{academia_url}" 27 | exit 1 28 | end 29 | retries = 0 30 | begin 31 | doc = Nokogiri::HTML(URI.open(uri)) 32 | rescue OpenURI::HTTPError => e 33 | $stderr.puts e.inspect 34 | retries += 1 35 | if retries < MAX_RETRIES 36 | sleep(5) 37 | retry 38 | else 39 | $stderr.puts "Max retries (= #{MAX_RETRIES}) reached, exiting after trying to open URL: #{academia_url}" 40 | exit 1 41 | end 42 | end 43 | begin 44 | doc_script = doc.css('script').find{|script| script.content.include?('[{"id":')} 45 | download_id = doc_script.content.split('[{"id":')[1].split(',')[0] 46 | 47 | url = "#{PREFIX}/#{download_id}/#{filename}" 48 | $stderr.puts "Resolved download URL: #{url}" 49 | stream = URI.open(url, **OPEN_URI_OPTIONS) 50 | IO.copy_stream(stream, filename) 51 | $stderr.puts "Downloaded #{filename}" 52 | rescue StandardError => e 53 | $stderr.puts "Error parsing/downloading file for URL #{url}: #{e.inspect}" 54 | $stderr.puts e.backtrace 55 | exit 1 56 | end 57 | end 58 | end 59 | --------------------------------------------------------------------------------