├── lib
    ├── panner
    │   ├── tumblr.rb
    │   ├── version.rb
    │   ├── panner.rb
    │   ├── package.rb
    │   ├── package_file_saver.rb
    │   ├── cli.rb
    │   ├── tumblr
    │   │   ├── feed.rb
    │   │   └── post.rb
    │   └── pans
    │   │   └── wordpress.rb
    └── panner.rb
├── Rakefile
├── Gemfile
├── bin
    ├── panner
    ├── setup
    └── console
├── .gitignore
├── panner.gemspec
├── LICENSE.txt
└── README.md


/lib/panner/tumblr.rb:
--------------------------------------------------------------------------------
1 | module Panner::Tumblr
2 | end


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require "bundler/gem_tasks"
2 | task :default => :spec
3 | 


--------------------------------------------------------------------------------
/lib/panner/version.rb:
--------------------------------------------------------------------------------
1 | module Panner
2 |   VERSION = "0.1.0"
3 | end
4 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | 
3 | # Specify your gem's dependencies in panner.gemspec
4 | gemspec
5 | 


--------------------------------------------------------------------------------
/bin/panner:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | 
3 | require "bundler/setup"
4 | require "panner"
5 | 
6 | Panner::CLI.new.run(ARGV)


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.bundle/
 2 | /.yardoc
 3 | /Gemfile.lock
 4 | /_yardoc/
 5 | /coverage/
 6 | /doc/
 7 | /pkg/
 8 | /spec/reports/
 9 | /tmp/
10 | 


--------------------------------------------------------------------------------
/bin/setup:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euo pipefail
3 | IFS=$'\n\t'
4 | set -vx
5 | 
6 | bundle install
7 | 
8 | # Do any other automated setup that you need to do here
9 | 


--------------------------------------------------------------------------------
/bin/console:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require "bundler/setup"
 4 | require "panner"
 5 | 
 6 | # You can add fixtures and/or initialization code here to make experimenting
 7 | # with your gem easier. You can also use a different console, if you like.
 8 | 
 9 | # (If you use this, don't forget to add pry to your Gemfile!)
10 | # require "pry"
11 | # Pry.start
12 | 
13 | require "irb"
14 | IRB.start
15 | 


--------------------------------------------------------------------------------
/lib/panner.rb:
--------------------------------------------------------------------------------
 1 | require "json"
 2 | 
 3 | require "mechanize"
 4 | require "deba"
 5 | 
 6 | module Panner
 7 |   module Pans
 8 |   end
 9 | 
10 |   # Your code goes here...
11 | end
12 | 
13 | require "panner/version"
14 | require "panner/package"
15 | require "panner/package_file_saver"
16 | require "panner/tumblr"
17 | require "panner/tumblr/post"
18 | require "panner/tumblr/feed"
19 | require "panner/pans/wordpress"
20 | require "panner/panner"
21 | require "panner/cli"
22 | 


--------------------------------------------------------------------------------
/lib/panner/panner.rb:
--------------------------------------------------------------------------------
 1 | class Panner::Panner
 2 |   def initialize(options)
 3 |     @options = options
 4 |     puts "@options: #{@options.inspect}"
 5 |   end
 6 |   
 7 |   def start
 8 |     pan = Panner::Pans::Wordpress.new(@options[:url])
 9 |     
10 |     nuggets = []
11 | 
12 |     loop do
13 |       fresh_nuggets = pan.download
14 |       break if fresh_nuggets.nil?
15 | 
16 |       nuggets.concat(fresh_nuggets)
17 |       pan.next
18 |     end
19 | 
20 |     nuggets.each do |nugget|
21 |       puts nugget.inspect
22 |       puts "======================================================================================================="
23 |     end
24 |   end
25 | end


--------------------------------------------------------------------------------
/lib/panner/package.rb:
--------------------------------------------------------------------------------
 1 | class Panner::Package
 2 |   attr_reader :root, :paths
 3 | 
 4 |   def initialize(root)
 5 |     raise "root must be an instance of Pathname" unless root.is_a?(Pathname)
 6 | 
 7 |     @root = root
 8 |     @paths = []
 9 |   end
10 | 
11 |   def agent
12 |     @agent ||= create_agent
13 |   end
14 | 
15 |   def add(path, content = nil)
16 |     file = (@root + path)
17 |     file.write(content) unless content.nil?
18 |     @paths << file
19 |   end
20 | 
21 |   def finish
22 |     add("package.json", @paths.map { |path| path.relative_path_from(@root).to_s }.to_json)
23 |   end
24 | 
25 |   def create_agent
26 |     agent = Mechanize.new
27 |     agent.pluggable_parser['image'] = Panner::PackageFileSaver.for(self)
28 |     agent
29 |   end
30 | end


--------------------------------------------------------------------------------
/lib/panner/package_file_saver.rb:
--------------------------------------------------------------------------------
 1 | class Panner::PackageFileSaver < Mechanize::Download
 2 |   def self.package
 3 |     @package
 4 |   end
 5 | 
 6 |   def initialize(*args)
 7 |     super(*args)
 8 | 
 9 |     @package = self.class.package
10 | 
11 |     if(path.to_s.bytes.length > 256)
12 |       @filename = "#{SecureRandom.hex(16)}#{Pathname.new(@filename).extname}}"
13 |     end
14 | 
15 |     if(path.to_s.bytes.length > 256)
16 |       @filename = SecureRandom.hex(16)
17 |     end
18 | 
19 |     save(path)
20 | 
21 |     @package.add(@filename)
22 |   end
23 | 
24 |   def path
25 |     @package.root + @filename
26 |   end
27 | 
28 |   def self.for(package)
29 |     Class.new self do |klass|
30 |       klass.instance_variable_set :@package, package
31 |     end
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/lib/panner/cli.rb:
--------------------------------------------------------------------------------
 1 | require "optparse"
 2 | 
 3 | class Panner::CLI
 4 |   def run(arguments)
 5 |     options = {}
 6 | 
 7 |     opt_parser = OptionParser.new do |opts|
 8 |       opts.banner = "Usage: panner [options] URL"
 9 | 
10 |       opts.on("-uUSERNAME", "--username=USERNAME", "Username") do |username|
11 |         options[:username] = username
12 |       end
13 |       
14 |       opts.on("-pPASSWORD", "--password=PASSWORD", "Username") do |password|
15 |         options[:password] = password
16 |       end
17 | 
18 |       opts.on("-h", "--help", "Prints this help") do
19 |         puts opts
20 |         exit
21 |       end
22 |     end
23 |     
24 |     opt_parser.parse!(arguments)
25 |     options[:url] = arguments.first
26 |     
27 |     Panner::Panner.new(options).start
28 |   end
29 | end
30 |     
31 |     


--------------------------------------------------------------------------------
/lib/panner/tumblr/feed.rb:
--------------------------------------------------------------------------------
 1 | class Panner::Tumblr::Feed
 2 |   attr_reader :posts
 3 | 
 4 |   def initialize(package, url)
 5 |     url =~ /^https?:\/\/(.*?).tumblr.com/
 6 |     @username = $1
 7 |     @index = 1
 8 | 
 9 |     @package = package
10 |   end
11 | 
12 |   def scrape
13 |     loop { break unless next_page }
14 |   end
15 | 
16 |   def next_page
17 |     new_posts = scrape_page(@package.agent.get(url))
18 |     @index += 1
19 |     new_posts.length > 0
20 |   end
21 | 
22 |   def scrape_page(page)
23 |     page.search("article[data-post-id]").map do |post|
24 |       scrape_post(post)
25 |     end
26 |   end
27 | 
28 |   def scrape_post(post_node)
29 |     post = Panner::Tumblr::Post.new(@package, post_node)
30 |     post.scrape
31 |     post
32 |   end
33 | 
34 |   def url
35 |     if @index == 1
36 |       "https://#{@username}.tumblr.com/"
37 |     else
38 |       "https://#{@username}.tumblr.com/page/#{@index}"
39 |     end
40 |   end
41 | end


--------------------------------------------------------------------------------
/lib/panner/pans/wordpress.rb:
--------------------------------------------------------------------------------
 1 | class Panner::Pans::Wordpress
 2 |   def self.eligible?(url)
 3 |     url =~ /^https?:\/\/[^\/]+\.wordpress\.com/
 4 |   end
 5 | 
 6 |   def initialize(url)
 7 |     @agent = Mechanize.new
 8 |     @next_url = url
 9 |     @page = nil
10 |   end
11 | 
12 |   def authenticate(options)
13 |   end
14 | 
15 |   def download
16 |     @page = @agent.get(@next_url)
17 |     puts "got page content"
18 | 
19 |     if @next_url.nil?
20 |       puts "no more content"
21 |       return
22 |     end
23 |     
24 |     @page.search("article.post").map do |article|
25 |       parse_article(article)
26 |     end
27 |   end
28 | 
29 |   def next
30 |     link = @page.search("div.nav-links div.nav-previous a").first
31 |     @next_url = link ? link['href'] : nil
32 |     puts "next_url: @next_url"
33 |   end
34 |   
35 |   def parse_article(article)
36 |     out = {}
37 |     out[:title] = article.at_css(".entry-title").text
38 |     out[:body] = Deba.extract(article.at_css(".entry-content").inner_html)
39 |     
40 |     out
41 |   end
42 | end


--------------------------------------------------------------------------------
/panner.gemspec:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | lib = File.expand_path('../lib', __FILE__)
 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 4 | require 'panner/version'
 5 | 
 6 | Gem::Specification.new do |spec|
 7 |   spec.name          = "panner"
 8 |   spec.version       = Panner::VERSION
 9 |   spec.authors       = ["Brenton \"B-Train\" Fletcher"]
10 |   spec.email         = ["i@bloople.net"]
11 | 
12 |   spec.summary       = %q{Panner pans for website gold.}
13 |   spec.description   = %q{Panner pans for website gold.}
14 |   spec.homepage      = "http://panner.com"
15 |   spec.license       = "MIT"
16 | 
17 |   spec.files         = `git ls-files -z`.split("\x0").reject do |f|
18 |     f.match(%r{^(test|spec|features)/})
19 |   end
20 |   spec.bindir        = "exe"
21 |   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22 |   spec.require_paths = ["lib"]
23 | 
24 |   spec.add_development_dependency "bundler", "~> 1.13"
25 |   spec.add_development_dependency "rake", "~> 10.0"
26 |   spec.add_dependency "mechanize"
27 |   spec.add_dependency "deba", "~> 0.10"
28 | end
29 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2017 Brenton "B-Train" Fletcher
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/lib/panner/tumblr/post.rb:
--------------------------------------------------------------------------------
 1 | class Panner::Tumblr::Post
 2 |   attr_reader :id, :image_urls, :html, :text
 3 | 
 4 |   def initialize(package, node)
 5 |     @package = package
 6 |     @node = node
 7 |   end
 8 | 
 9 |   def scrape
10 |     @id = @node['data-post-id']
11 | 
12 |     post_content = @node.search(".post-content").first
13 | 
14 |     @html = post_content.to_html
15 |     @text = Deba.extract(@html)
16 | 
17 |     @dependent_urls = []
18 | 
19 |     iframe = post_content.at("iframe")
20 |     scrape_images(@package.agent.get(iframe['src'])) if iframe
21 | 
22 |     scrape_images(post_content)
23 | 
24 |     scrape_dependencies
25 | 
26 |     @package.add("#{@id}.json", serialize.to_json)
27 |   end
28 | 
29 |   def serialize
30 |     {
31 |       id: @id,
32 |       html: @html,
33 |       text: @text,
34 |       dependent_urls_map: @dependent_urls_map
35 |     }
36 |   end
37 | 
38 |   def scrape_images(container)
39 |     images = container.search("img").reject { |image| image.matches?(".avatar img") }
40 |     @dependent_urls.concat(images.map { |image| image['src'] })
41 |   end
42 | 
43 |   def scrape_dependencies
44 |     @dependent_urls_map = {}
45 | 
46 |     @dependent_urls.each do |url|
47 |       begin
48 |         file_saver = @package.agent.get(url)
49 |         @dependent_urls_map[url] = file_saver.filename
50 |       rescue Exception => e
51 |         puts "#{e.message}"
52 |       end
53 |     end
54 |   end
55 | end


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Panner
 2 | 
 3 | Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/panner`. To experiment with that code, run `bin/console` for an interactive prompt.
 4 | 
 5 | TODO: Delete this and the text above, and describe your gem
 6 | 
 7 | ## Installation
 8 | 
 9 | Add this line to your application's Gemfile:
10 | 
11 | ```ruby
12 | gem 'panner'
13 | ```
14 | 
15 | And then execute:
16 | 
17 |     $ bundle
18 | 
19 | Or install it yourself as:
20 | 
21 |     $ gem install panner
22 | 
23 | ## Usage
24 | 
25 | TODO: Write usage instructions here
26 | 
27 | ## Development
28 | 
29 | After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30 | 
31 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32 | 
33 | ## Contributing
34 | 
35 | Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/panner.
36 | 
37 | 
38 | ## License
39 | 
40 | The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
41 | 
42 | 


--------------------------------------------------------------------------------