├── .rspec
├── lib
    ├── feedjira
    │   ├── version.rb
    │   ├── parser.rb
    │   ├── util.rb
    │   ├── parser
    │   │   ├── rss_entry.rb
    │   │   ├── itunes_rss_owner.rb
    │   │   ├── rss_image.rb
    │   │   ├── atom_entry.rb
    │   │   ├── globally_unique_identifier.rb
    │   │   ├── google_docs_atom_entry.rb
    │   │   ├── rss_feed_burner_entry.rb
    │   │   ├── podlove_chapter.rb
    │   │   ├── atom_feed_burner_entry.rb
    │   │   ├── atom_google_alerts_entry.rb
    │   │   ├── rss_feed_burner.rb
    │   │   ├── atom_youtube.rb
    │   │   ├── atom_google_alerts.rb
    │   │   ├── google_docs_atom.rb
    │   │   ├── rss.rb
    │   │   ├── atom.rb
    │   │   ├── itunes_rss_category.rb
    │   │   ├── atom_youtube_entry.rb
    │   │   ├── json_feed.rb
    │   │   ├── atom_feed_burner.rb
    │   │   ├── json_feed_item.rb
    │   │   ├── itunes_rss_item.rb
    │   │   └── itunes_rss.rb
    │   ├── preprocessor.rb
    │   ├── feed.rb
    │   ├── atom_entry_utilities.rb
    │   ├── util
    │   │   └── parse_time.rb
    │   ├── rss_entry_utilities.rb
    │   ├── configuration.rb
    │   ├── feed_entry_utilities.rb
    │   └── feed_utilities.rb
    └── feedjira.rb
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── general-issue.md
    │   └── feed-parsing.md
    ├── dependabot.yml
    ├── workflows
    │   └── ruby.yml
    └── copilot-instructions.md
├── .gitignore
├── spec
    ├── support
    │   └── coverage.rb
    ├── spec_helper.rb
    ├── sample_feeds
    │   ├── AtomEscapedHTMLInPreTag.xml
    │   ├── atom_simple_single_entry.xml
    │   ├── atom_simple_single_entry_link_self.xml
    │   ├── Permalinks.xml
    │   ├── InvalidDateFormat.xml
    │   ├── FeedBurnerUrlNoAlternate.xml
    │   ├── TechCrunchFirstEntryDescription.xml
    │   ├── atom_with_link_tag_for_url_unmarked.xml
    │   ├── HREFConsideredHarmfulFirstEntry.xml
    │   ├── a10.xml
    │   ├── AmazonWebServicesBlogFirstEntryContent.xml
    │   ├── ITunesWithSpacesInAttributes.xml
    │   ├── AtomFeedWithSpacesAroundEquals.xml
    │   ├── ITunesWithSingleQuotedAttributes.xml
    │   ├── TechCrunchFirstEntry.xml
    │   ├── FeedjiraBlog.xml
    │   ├── itunes.xml
    │   ├── PaulDixExplainsNothingFirstEntryContent.xml
    │   ├── TenderLovemakingFirstEntry.xml
    │   ├── itunes_feedburner.xml
    │   └── GoogleDocsList.xml
    ├── feedjira
    │   ├── configuration_spec.rb
    │   ├── parser
    │   │   ├── i_tunes_rss_owner_spec.rb
    │   │   ├── google_docs_atom_entry_spec.rb
    │   │   ├── i_tunes_rss_category_spec.rb
    │   │   ├── atom_youtube_spec.rb
    │   │   ├── podlove_chapter_spec.rb
    │   │   ├── atom_google_alerts_entry_spec.rb
    │   │   ├── atom_google_alerts_spec.rb
    │   │   ├── google_docs_atom_spec.rb
    │   │   ├── json_feed_spec.rb
    │   │   ├── rss_feed_burner_spec.rb
    │   │   ├── atom_feed_burner_entry_spec.rb
    │   │   ├── rss_spec.rb
    │   │   ├── i_tunes_rss_item_spec.rb
    │   │   ├── atom_youtube_entry_spec.rb
    │   │   ├── atom_entry_spec.rb
    │   │   ├── rss_feed_burner_entry_spec.rb
    │   │   ├── itunes_rss_spec.rb
    │   │   ├── atom_feed_burner_spec.rb
    │   │   ├── json_feed_item_spec.rb
    │   │   ├── rss_entry_spec.rb
    │   │   └── atom_spec.rb
    │   ├── preprocessor_spec.rb
    │   ├── atom_entry_utilities_spec.rb
    │   ├── util
    │   │   └── parse_time_spec.rb
    │   ├── feed_spec.rb
    │   └── feed_utilities_entry_spec.rb
    ├── sample_feeds.rb
    └── feedjira_spec.rb
├── Rakefile
├── Gemfile
├── feedjira.gemspec
├── LICENSE
├── .rubocop.yml
├── CODE_OF_CONDUCT.md
└── README.md


/.rspec:
--------------------------------------------------------------------------------
1 | --color


--------------------------------------------------------------------------------
/lib/feedjira/version.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | module Feedjira
4 |   VERSION = "4.0.1"
5 | end
6 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | module Feedjira
4 |   module Parser
5 |   end
6 | end
7 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/general-issue.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feed Parsing
3 | about: Anything else
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 | 
8 | ---


--------------------------------------------------------------------------------
/lib/feedjira/util.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | module Feedjira
4 |   # Utility modules and helper functions
5 |   module Util
6 |   end
7 | end
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .bundle
 2 | .projections.json
 3 | .ruby-gemset
 4 | .ruby-version
 5 | Gemfile.lock
 6 | doc/
 7 | .yardoc/
 8 | pkg/
 9 | rdoc/
10 | coverage/
11 | vendor/bundle/
12 | 


--------------------------------------------------------------------------------
/spec/support/coverage.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "simplecov"
 4 | 
 5 | SimpleCov.start do
 6 |   enable_coverage :branch
 7 |   add_filter "_spec.rb"
 8 | end
 9 | 
10 | SimpleCov.minimum_coverage(line: 100, branch: 100)
11 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/rss_entry.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with RDF feed entries.
 6 |     class RSSEntry
 7 |       include SAXMachine
 8 |       include FeedEntryUtilities
 9 |       include RSSEntryUtilities
10 |     end
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/itunes_rss_owner.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     class ITunesRSSOwner
 6 |       include SAXMachine
 7 |       include FeedUtilities
 8 | 
 9 |       element :"itunes:name", as: :name
10 |       element :"itunes:email", as: :email
11 |     end
12 |   end
13 | end
14 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # coverage setup must come before loading lib/ code
 4 | require "support/coverage"
 5 | 
 6 | require File.expand_path("#{File.dirname(__FILE__)}/../lib/feedjira")
 7 | require "sample_feeds"
 8 | 
 9 | SAXMachine.handler = ENV["HANDLER"].to_sym if ENV["HANDLER"]
10 | 
11 | RSpec.configure do |c|
12 |   c.include SampleFeeds
13 | end
14 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/rss_image.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with RSS images
 6 |     class RSSImage
 7 |       include SAXMachine
 8 | 
 9 |       element :description
10 |       element :height
11 |       element :link
12 |       element :title
13 |       element :url
14 |       element :width
15 |     end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/atom_entry.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with Atom feed entries.
 6 |     class AtomEntry
 7 |       include SAXMachine
 8 |       include FeedEntryUtilities
 9 |       include AtomEntryUtilities
10 | 
11 |       element :"media:thumbnail", as: :image, value: :url
12 |       element :"media:content", as: :image, value: :url
13 |     end
14 |   end
15 | end
16 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/AtomEscapedHTMLInPreTag.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title type="html">Test feed</title>
 4 |   <entry>
 5 |     <title type="text">Test entry</title>
 6 |     <content type="xhtml">
 7 |       <div xmlns="http://www.w3.org/1999/xhtml">
 8 |         <p>This is escaped html:</p>
 9 |         <pre>&lt;b&gt;test&lt;b&gt;</pre>
10 |       </div>
11 |     </content>
12 |   </entry>
13 | </feed>
14 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/globally_unique_identifier.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     class GloballyUniqueIdentifier
 6 |       include SAXMachine
 7 | 
 8 |       attribute :isPermaLink, as: :is_perma_link
 9 | 
10 |       value :guid
11 | 
12 |       def perma_link?
13 |         is_perma_link != "false"
14 |       end
15 | 
16 |       def url
17 |         perma_link? ? guid : nil
18 |       end
19 |     end
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/google_docs_atom_entry.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     class GoogleDocsAtomEntry
 6 |       include SAXMachine
 7 |       include FeedEntryUtilities
 8 |       include AtomEntryUtilities
 9 | 
10 |       element :"docs:md5Checksum", as: :checksum
11 |       element :"docs:filename", as: :original_filename
12 |       element :"docs:suggestedFilename", as: :suggested_filename
13 |     end
14 |   end
15 | end
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feed-parsing.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feed Parsing
 3 | about: Your feed is parsing incorrectly, or you have a feed type that is not supported
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ### Steps to reproduce
11 | <!-- (Guidelines for creating a bug report are [available
12 | here](https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#creating-a-bug-report)) -->
13 | 
14 | ### Example feed URL
15 | <!-- Tell us what should happen -->
16 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "bundler/gem_tasks"
 4 | require "rspec/core/rake_task"
 5 | require "rubocop/rake_task"
 6 | require "yard"
 7 | 
 8 | RSpec::Core::RakeTask.new(:spec) do |t|
 9 |   t.verbose = false
10 | end
11 | 
12 | RuboCop::RakeTask.new(:rubocop) do |t|
13 |   t.options = ["--display-cop-names"]
14 | end
15 | 
16 | YARD::Rake::YardocTask.new do |t|
17 |   t.files   = ["lib/**/*.rb", "-", "LICENSE"]
18 | end
19 | 
20 | task default: %i[spec rubocop]
21 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/rss_feed_burner_entry.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with RDF feed entries.
 6 |     class RSSFeedBurnerEntry
 7 |       include SAXMachine
 8 |       include FeedEntryUtilities
 9 |       include RSSEntryUtilities
10 | 
11 |       element :"feedburner:origLink", as: :orig_link
12 |       private :orig_link
13 | 
14 |       def url
15 |         orig_link || super
16 |       end
17 |     end
18 |   end
19 | end
20 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | source "https://rubygems.org/"
 4 | 
 5 | gemspec
 6 | 
 7 | gem "faraday", "~> 2.14.0"
 8 | gem "pry", "~> 0.15.0"
 9 | gem "rspec", "~> 3.13.0"
10 | gem "rubocop", "~> 1.81.1"
11 | gem "rubocop-performance", "~> 1.26.0"
12 | gem "rubocop-rake", "~> 0.7.1"
13 | gem "rubocop-rspec", "~> 3.7.0"
14 | gem "simplecov", "~> 0.22.0"
15 | gem "yard", "~> 0.9.34"
16 | 
17 | group :test do
18 |   gem "oga", "~> 3.4"
19 |   gem "ox", "~> 2.14.17", platforms: %i[mri rbx]
20 |   gem "rake", "~> 13.3.0"
21 | end
22 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/podlove_chapter.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     class PodloveChapter
 6 |       include SAXMachine
 7 |       include FeedEntryUtilities
 8 | 
 9 |       attribute :start, as: :start_ntp
10 |       attribute :title
11 |       attribute :href, as: :url
12 |       attribute :image
13 | 
14 |       def start
15 |         return unless start_ntp
16 | 
17 |         parts = start_ntp.split(":")
18 |         parts.reverse.to_enum.with_index.sum do |part, index|
19 |           part.to_f * (60**index)
20 |         end
21 |       end
22 |     end
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/atom_feed_burner_entry.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with Feedburner Atom feed entries.
 6 |     class AtomFeedBurnerEntry
 7 |       include SAXMachine
 8 |       include FeedEntryUtilities
 9 |       include AtomEntryUtilities
10 | 
11 |       element :"feedburner:origLink", as: :orig_link
12 |       private :orig_link
13 | 
14 |       element :"media:thumbnail", as: :image, value: :url
15 |       element :"media:content", as: :image, value: :url
16 | 
17 |       def url
18 |         orig_link || super
19 |       end
20 |     end
21 |   end
22 | end
23 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/atom_google_alerts_entry.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "uri"
 4 | 
 5 | module Feedjira
 6 |   module Parser
 7 |     # Parser for dealing with Feedburner Atom feed entries.
 8 |     class AtomGoogleAlertsEntry
 9 |       include SAXMachine
10 |       include FeedEntryUtilities
11 |       include AtomEntryUtilities
12 | 
13 |       def url
14 |         url = super
15 |         return unless url&.start_with?("https://www.google.com/url?")
16 | 
17 |         uri = URI(url)
18 |         cons = URI.decode_www_form(uri.query).assoc("url")
19 |         cons && cons[1]
20 |       end
21 |     end
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/spec/feedjira/configuration_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Configuration do
 6 |   describe ".configure" do
 7 |     it "sets strip_whitespace config" do
 8 |       Feedjira.configure { |config| config.strip_whitespace = true }
 9 |       expect(Feedjira.strip_whitespace).to be true
10 |     end
11 | 
12 |     it "allows parsers to be modified" do
13 |       custom_parser = Class.new
14 | 
15 |       Feedjira.configure { |config| config.parsers.unshift(custom_parser) }
16 |       expect(Feedjira.parsers.first).to eq(custom_parser)
17 |       Feedjira.reset_configuration!
18 |     end
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/atom_simple_single_entry.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title>Example Feed</title>
 4 |   <link href="http://example.org/"/>
 5 |   <updated>2003-12-13T18:30:02Z</updated>
 6 |   <author>
 7 |     <name>John Doe</name>
 8 |   </author>
 9 |   <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
10 |   <entry>
11 |     <title>Atom-Powered Robots Run Amok</title>
12 |     <link href="http://example.org/2003/12/13/atom03"/>
13 |     <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
14 |     <updated>2003-12-13T18:30:02Z</updated>
15 |     <summary>Some text.</summary>
16 |   </entry>
17 | </feed>
18 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "bundler" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "weekly"
12 |     assignees:
13 |       - "mockdeep"
14 |     groups:
15 |       all:
16 |         patterns:
17 |           - "*"
18 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/atom_simple_single_entry_link_self.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title>Example Feed</title>
 4 |   <link href="http://example.org/atom.xml" rel="self"/>
 5 |   <updated>2003-12-13T18:30:02Z</updated>
 6 |   <author>
 7 |     <name>John Doe</name>
 8 |   </author>
 9 |   <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
10 |   <entry>
11 |     <title>Atom-Powered Robots Run Amok</title>
12 |     <link href="http://example.org/2003/12/13/atom03"/>
13 |     <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
14 |     <updated>2003-12-13T18:30:02Z</updated>
15 |     <summary>Some text.</summary>
16 |   </entry>
17 | </feed>
18 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/i_tunes_rss_owner_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Parser::ITunesRSSOwner do
 6 |   before do
 7 |     # I don't really like doing it this way because these unit test should only
 8 |     # rely on RSSEntry, but this is actually how it should work. You would
 9 |     # never just pass entry xml straight to the ITunesRssOwner
10 |     feed = Feedjira::Parser::ITunesRSS.parse sample_itunes_feed
11 |     @owner = feed.itunes_owners.first
12 |   end
13 | 
14 |   it "parses the name" do
15 |     expect(@owner.name).to eq "John Doe"
16 |   end
17 | 
18 |   it "parses the email" do
19 |     expect(@owner.email).to eq "john.doe@example.com"
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/Permalinks.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <rss version="2.0">
 3 |     <channel>
 4 |         <item>
 5 |             <title>Item 1</title>
 6 |             <guid isPermaLink="false">http://example.com/1</guid>
 7 |         </item>
 8 |         <item>
 9 |             <title>Item 2</title>
10 |             <guid isPermaLink="true">http://example.com/2</guid>
11 |         </item>
12 |         <item>
13 |             <title>Item 3</title>
14 |             <guid>http://example.com/3</guid>
15 |         </item>
16 |         <item>
17 |             <title>Item 4</title>
18 |             <link>http://example.com/4</link>
19 |             <guid>http://example.com/5</guid>
20 |         </item>
21 |     </channel>
22 | </rss>


--------------------------------------------------------------------------------
/lib/feedjira/parser/rss_feed_burner.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with RSS feeds.
 6 |     class RSSFeedBurner
 7 |       include SAXMachine
 8 |       include FeedUtilities
 9 | 
10 |       element :title
11 |       element :description
12 |       element :link, as: :url
13 |       element :lastBuildDate, as: :last_built
14 |       elements :"atom10:link", as: :hubs, value: :href, with: { rel: "hub" }
15 |       elements :item, as: :entries, class: RSSFeedBurnerEntry
16 | 
17 |       attr_accessor :feed_url
18 | 
19 |       def self.able_to_parse?(xml) # :nodoc:
20 |         (/<rss|<rdf/ =~ xml) && xml.include?("feedburner")
21 |       end
22 |     end
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/InvalidDateFormat.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
 2 |   xmlns:dc="http://purl.org/dc/elements/1.1/">
 3 | 
 4 | <channel>
 5 |   <title>Invalid date format feed</title>
 6 |   <link>http://example.com/feed</link>
 7 |   <language>en-US</language>
 8 |   <item>
 9 |     <title>Item 0 with an invalid date</title>
10 |     <link>http://example.com/item0</link>
11 |     <pubDate>Mon, 16 Oct 2017 15:10:00 +0000</pubDate>
12 |     <dc:date>1518478934</dc:date>
13 |   </item>
14 |   <item>
15 |     <title>Item 1 with all valid dates</title>
16 |     <link>http://example.com/item1</link>
17 |     <pubDate>Tue, 17 Oct 2017 12:17:00 +0000</pubDate>
18 |     <dc:date>Tue, 17 Oct 2017 22:17:00 +0000</dc:date>
19 |   </item>
20 | </channel>
21 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/atom_youtube.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with RSS feeds.
 6 |     class AtomYoutube
 7 |       include SAXMachine
 8 |       include FeedUtilities
 9 | 
10 |       element :title
11 |       element :link, as: :url, value: :href, with: { rel: "alternate" }
12 |       element :link, as: :feed_url, value: :href, with: { rel: "self" }
13 |       element :name, as: :author
14 |       element :"yt:channelId", as: :youtube_channel_id
15 | 
16 |       elements :entry, as: :entries, class: AtomYoutubeEntry
17 | 
18 |       def self.able_to_parse?(xml) # :nodoc:
19 |         xml.include?("xmlns:yt=\"http://www.youtube.com/xml/schemas/2015\"")
20 |       end
21 |     end
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/feedjira/preprocessor.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   class Preprocessor
 5 |     def initialize(xml)
 6 |       @xml = xml
 7 |     end
 8 | 
 9 |     def to_xml
10 |       process_content
11 |       doc.to_xml
12 |     end
13 | 
14 |     private
15 | 
16 |     def process_content
17 |       content_nodes.each do |node|
18 |         node.content = raw_html(node)
19 |       end
20 |     end
21 | 
22 |     def content_nodes
23 |       doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]'
24 |     end
25 | 
26 |     def raw_html(node)
27 |       node.search("./div").inner_html
28 |     end
29 | 
30 |     def doc
31 |       @doc ||= Nokogiri::XML(@xml).remove_namespaces!
32 |     end
33 |   end
34 | end
35 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/google_docs_atom_entry_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Parser::GoogleDocsAtomEntry do
 6 |   describe "parsing" do
 7 |     before do
 8 |       xml = sample_google_docs_list_feed
 9 |       @feed = Feedjira::Parser::GoogleDocsAtom.parse xml
10 |       @entry = @feed.entries.first
11 |     end
12 | 
13 |     it "has the custom checksum element" do
14 |       expect(@entry.checksum).to eq "2b01142f7481c7b056c4b410d28f33cf"
15 |     end
16 | 
17 |     it "has the custom filename element" do
18 |       expect(@entry.original_filename).to eq "MyFile.pdf"
19 |     end
20 | 
21 |     it "has the custom suggested filename element" do
22 |       expect(@entry.suggested_filename).to eq "TaxDocument.pdf"
23 |     end
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/atom_google_alerts.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with Feedburner Atom feeds.
 6 |     class AtomGoogleAlerts
 7 |       include SAXMachine
 8 |       include FeedUtilities
 9 | 
10 |       element :title
11 |       element :subtitle, as: :description
12 |       element :link, as: :feed_url, value: :href, with: { rel: "self" }
13 |       element :link, as: :url, value: :href, with: { rel: "self" }
14 |       elements :link, as: :links, value: :href
15 |       elements :entry, as: :entries, class: AtomGoogleAlertsEntry
16 | 
17 |       def self.able_to_parse?(xml)
18 |         Atom.able_to_parse?(xml) && (%r{<id>tag:google\.com,2005:[^<]+/com\.google/alerts/} === xml) # rubocop:disable Style/CaseEquality
19 |       end
20 | 
21 |       def self.preprocess(xml)
22 |         Preprocessor.new(xml).to_xml
23 |       end
24 |     end
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/google_docs_atom.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require File.expand_path("./atom", File.dirname(__FILE__))
 4 | module Feedjira
 5 |   module Parser
 6 |     class GoogleDocsAtom
 7 |       include SAXMachine
 8 |       include FeedUtilities
 9 | 
10 |       element :title
11 |       element :subtitle, as: :description
12 |       element :link, as: :url, value: :href, with: { type: "text/html" }
13 |       element :link, as: :feed_url, value: :href, with: { type: "application/atom+xml" }
14 |       elements :link, as: :links, value: :href
15 |       elements :entry, as: :entries, class: GoogleDocsAtomEntry
16 | 
17 |       def url
18 |         @url ||= links.first
19 |       end
20 | 
21 |       def self.able_to_parse?(xml) # :nodoc:
22 |         %r{<id>https?://docs\.google\.com/.*</id>} =~ xml
23 |       end
24 | 
25 |       def feed_url
26 |         @feed_url ||= links.first
27 |       end
28 |     end
29 |   end
30 | end
31 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/rss.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with RSS feeds.
 6 |     # Source: https://cyber.harvard.edu/rss/rss.html
 7 |     class RSS
 8 |       include SAXMachine
 9 |       include FeedUtilities
10 | 
11 |       element :description
12 |       element :image, class: RSSImage
13 |       element :language
14 |       element :lastBuildDate, as: :last_built
15 |       element :link, as: :url
16 |       element :"a10:link", as: :url, value: :href
17 |       element :rss, as: :version, value: :version
18 |       element :title
19 |       element :ttl
20 |       elements :"atom:link", as: :hubs, value: :href, with: { rel: "hub" }
21 |       elements :item, as: :entries, class: RSSEntry
22 | 
23 |       attr_accessor :feed_url
24 | 
25 |       def self.able_to_parse?(xml)
26 |         (/<rss|<rdf/ =~ xml) && !xml.include?("feedburner")
27 |       end
28 |     end
29 |   end
30 | end
31 | 


--------------------------------------------------------------------------------
/.github/workflows/ruby.yml:
--------------------------------------------------------------------------------
 1 | name: Ruby
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   test:
11 |     strategy:
12 |       fail-fast: false
13 |       matrix:
14 |         os: [ubuntu]
15 |         ruby: [3.1, 3.2, 3.3, 3.4]
16 |         handler: [nokogiri, ox, oga]
17 |         exclude:
18 |           - { ruby: jruby, handler: ox }
19 | 
20 |     name: >-
21 |       ${{matrix.os}}-ruby-${{matrix.ruby}}-${{matrix.handler}}
22 |     runs-on: ${{matrix.os}}-latest
23 |     continue-on-error: ${{matrix.ruby == 'head' || matrix.ruby == 'jruby'}}
24 |     env:
25 |       HANDLER: ${{matrix.handler}}
26 | 
27 |     steps:
28 |       - name: Check out
29 |         uses: actions/checkout@v2
30 | 
31 |       - name: Set up ruby and bundle
32 |         uses: ruby/setup-ruby@v1
33 |         with:
34 |           ruby-version: ${{matrix.ruby}}
35 |           bundler-cache: true
36 | 
37 |       - name: Run rake
38 |         run: |
39 |           bundle exec rake
40 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/atom.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with Atom feeds.
 6 |     class Atom
 7 |       include SAXMachine
 8 |       include FeedUtilities
 9 | 
10 |       element :title
11 |       element :subtitle, as: :description
12 |       element :link, as: :url, value: :href, with: { type: "text/html" }
13 |       element :link, as: :feed_url, value: :href, with: { rel: "self" }
14 |       elements :link, as: :links, value: :href
15 |       elements :link, as: :hubs, value: :href, with: { rel: "hub" }
16 |       elements :entry, as: :entries, class: AtomEntry
17 |       element :icon
18 | 
19 |       def self.able_to_parse?(xml)
20 |         %r{<feed[^>]+xmlns\s?=\s?["'](https?://www\.w3\.org/2005/Atom|http://purl\.org/atom/ns\#)["'][^>]*>} =~ xml
21 |       end
22 | 
23 |       def url
24 |         @url || (links - [feed_url]).last
25 |       end
26 | 
27 |       def self.preprocess(xml)
28 |         Preprocessor.new(xml).to_xml
29 |       end
30 |     end
31 |   end
32 | end
33 | 


--------------------------------------------------------------------------------
/feedjira.gemspec:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require File.expand_path("lib/feedjira/version", __dir__)
 4 | 
 5 | Gem::Specification.new do |s|
 6 |   s.authors = [
 7 |     "Adam Hess",
 8 |     "Akinori Musha",
 9 |     "Ezekiel Templin",
10 |     "Jon Allured",
11 |     "Julien Kirch",
12 |     "Michael Stock",
13 |     "Paul Dix"
14 |   ]
15 |   s.homepage = "https://github.com/feedjira/feedjira"
16 |   s.license  = "MIT"
17 |   s.name     = "feedjira"
18 |   s.platform = Gem::Platform::RUBY
19 |   s.summary  = "A feed parsing library"
20 |   s.version  = Feedjira::VERSION
21 | 
22 |   s.metadata = {
23 |     "homepage_uri" => "https://github.com/feedjira/feedjira",
24 |     "source_code_uri" => "https://github.com/feedjira/feedjira",
25 |     "changelog_uri" => "https://github.com/feedjira/feedjira/blob/main/CHANGELOG.md",
26 |     "rubygems_mfa_required" => "true"
27 |   }
28 | 
29 |   s.files         = `git ls-files`.split("\n")
30 |   s.require_paths = ["lib"]
31 | 
32 |   s.required_ruby_version = ">=3.1"
33 | 
34 |   s.add_dependency "logger", ">= 1.0", "< 2"
35 |   s.add_dependency "loofah", ">= 2.3.1", "< 3"
36 |   s.add_dependency "sax-machine", ">= 1.0", "< 2"
37 | end
38 | 


--------------------------------------------------------------------------------
/spec/feedjira/preprocessor_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Preprocessor do
 6 |   it "returns the xml as parsed by Nokogiri" do
 7 |     xml = "<xml></xml>"
 8 |     doc = Nokogiri::XML(xml).remove_namespaces!
 9 |     processor = described_class.new xml
10 |     escaped = processor.to_xml
11 | 
12 |     expect(escaped).to eq doc.to_xml
13 |   end
14 | 
15 |   it "escapes markup in xhtml content" do
16 |     processor = described_class.new sample_atom_xhtml_feed
17 |     escaped = processor.to_xml
18 |     escaped_parts = escaped.split "\n"
19 | 
20 |     expect(escaped_parts[10]).to match(%r{&lt;i&gt;dogs&lt;/i&gt;}) # title
21 |     expect(escaped_parts[16]).to match(%r{&lt;b&gt;XHTML&lt;/b&gt;}) # summary
22 |     expect(escaped_parts[26]).to match(/&lt;p&gt;$/) # content
23 |   end
24 | 
25 |   it "leaves escaped html within pre tag" do
26 |     processor = described_class.new(sample_atom_xhtml_with_escpaed_html_in_pre_tag_feed)
27 |     escaped = processor.to_xml
28 |     expected_pre_tag = "        &lt;pre&gt;&amp;lt;b&amp;gt;test&amp;lt;b&amp;gt;&lt;/pre&gt;"
29 |     expect(escaped.split("\n")[7]).to eq(expected_pre_tag)
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/FeedBurnerUrlNoAlternate.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | <?xml version="1.0" encoding="UTF-8"?>
 3 | <?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?>
 4 | <feed xmlns="http://www.w3.org/2005/Atom">
 5 | 
 6 |   <title>QQQQ</title>
 7 | 
 8 |   <link href="http://example.com/"/>
 9 |   <updated>2010-09-18T10:02:20-07:00</updated>
10 |   <id>QQQQ</id>
11 |   <author>
12 |     <name>QQQQ</name>
13 |     <email>QQQQ@example.com</email>
14 |   </author>
15 | 
16 | 
17 |   <atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml"
18 |                href="http://feeds.feedburner.com/QQQQ"/>
19 |   <feedburner:info xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" uri="QQQQ"/>
20 |   <atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/"/>
21 |   <entry>
22 |     <title>QQQQ</title>
23 |     <link href="http://example.com/QQQQ.html"/>
24 |     <updated>2010-08-11T00:00:00-07:00</updated>
25 |     <id>http://example.com/QQQQ.html</id>
26 |     <content type="html">QQQQ</content>
27 |   </entry>
28 | </feed>
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2009-2016:
 4 | 
 5 | - Paul Dix
 6 | - Julien Kirch
 7 | - Ezekiel Templin
 8 | - Jon Allured
 9 | 
10 | Permission is hereby granted, free of charge, to any person obtaining a copy
11 | of this software and associated documentation files (the "Software"), to deal
12 | in the Software without restriction, including without limitation the rights
13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 | copies of the Software, and to permit persons to whom the Software is
15 | furnished to do so, subject to the following conditions:
16 | 
17 | The above copyright notice and this permission notice shall be included in all
18 | copies or substantial portions of the Software.
19 | 
20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 | SOFTWARE.
27 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/itunes_rss_category.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # iTunes extensions to the standard RSS2.0 item
 6 |     # Source: https://help.apple.com/itc/podcasts_connect/#/itcb54353390
 7 |     class ITunesRSSCategory
 8 |       include SAXMachine
 9 | 
10 |       attribute :text
11 | 
12 |       elements :"itunes:category", as: :itunes_categories,
13 |                                    class: ITunesRSSCategory
14 | 
15 |       def each_subcategory(&block)
16 |         return to_enum(__method__) unless block
17 | 
18 |         yield text
19 | 
20 |         itunes_categories.each do |itunes_category|
21 |           itunes_category.each_subcategory(&block)
22 |         end
23 |       end
24 | 
25 |       def each_path(ancestors = [], &block)
26 |         return to_enum(__method__, ancestors) unless block
27 | 
28 |         category_hierarchy = ancestors + [text]
29 | 
30 |         if itunes_categories.empty?
31 |           yield category_hierarchy
32 |         else
33 |           itunes_categories.each do |itunes_category|
34 |             itunes_category.each_path(category_hierarchy, &block)
35 |           end
36 |         end
37 |       end
38 |     end
39 |   end
40 | end
41 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/i_tunes_rss_category_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Parser::ITunesRSSCategory do
 6 |   describe "#each_subcategory" do
 7 |     it "returns an enumerator when no block is given" do
 8 |       category = described_class.new
 9 |       category.text = "Technology"
10 | 
11 |       result = category.each_subcategory
12 |       expect(result).to be_an(Enumerator)
13 |     end
14 | 
15 |     it "yields category text and subcategories when block is given" do
16 |       parent_category = described_class.new
17 |       parent_category.text = "Technology"
18 | 
19 |       subcategory = described_class.new
20 |       subcategory.text = "Gadgets"
21 | 
22 |       parent_category.itunes_categories = [subcategory]
23 | 
24 |       yielded_categories = []
25 |       parent_category.each_subcategory { |cat| yielded_categories << cat }
26 | 
27 |       expect(yielded_categories).to eq %w[Technology Gadgets]
28 |     end
29 |   end
30 | 
31 |   describe "#each_path" do
32 |     it "returns an enumerator when no block is given" do
33 |       category = described_class.new
34 |       category.text = "Technology"
35 | 
36 |       result = category.each_path
37 |       expect(result).to be_an(Enumerator)
38 |     end
39 |   end
40 | end
41 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/TechCrunchFirstEntryDescription.xml:
--------------------------------------------------------------------------------
1 | <img width="100" height="62" src="http://tctechcrunch2011.files.wordpress.com/2011/11/angies-list.jpeg?w=100&amp;h=62&amp;crop=1" class="attachment-tc-carousel-river-thumb wp-post-image" alt="angies-list" title="angies-list" style="float: left; margin: 0 10px 7px 0;" />Angie's List, which offers consumers a way to review and rate doctors, contractors and service companies on the Web, has just set the terms for its IPO. In a <a href="http://www.sec.gov/Archives/edgar/data/1491778/000119312511292292/d222159ds1a.htm">new filing</a>, the company revealed that it aims to raise as much as $131.4 million in the offering and has priced its IPO in the range of $11 to $13 per share. The company will <a href="http://techcrunch.com/2011/10/31/pre-ipo-angies-list-is-the-latest-tech-company-to-list-on-the-nasdaq/">list on the Nasdaq</a> under the symbol “ANGI.” At the high end of the range, Angie's List would be valued at nearly $700 million. 
2 | 
3 | Angie’s List launched in 1995 with a focus on local home, yard and car services, sits at the intersection of local search, user-generated content and subscription-based services. To date, Angie’s List has raised nearly $100 million from Battery Ventures, T. Rowe Price, City Investment Group, Cardinal Ventures and others.
4 | 


--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
 1 | inherit_from: .rubocop_todo.yml
 2 | 
 3 | plugins:
 4 |   - rubocop-rake
 5 |   - rubocop-rspec
 6 |   - rubocop-performance
 7 | 
 8 | AllCops:
 9 |   EnabledByDefault: true
10 |   TargetRubyVersion: 3.1
11 | 
12 | # Offense count: 3
13 | # Configuration parameters: IgnoredMethods.
14 | Metrics/AbcSize:
15 |   Max: 24
16 | 
17 | # Offense count: 33
18 | # Configuration parameters: CountComments, CountAsOne, ExcludedMethods.
19 | # ExcludedMethods: refine
20 | Metrics/BlockLength:
21 |   Max: 235
22 | 
23 | # Offense count: 7
24 | # Configuration parameters: CountComments, CountAsOne, ExcludedMethods.
25 | Metrics/MethodLength:
26 |   Max: 25
27 | 
28 | Layout/LineLength:
29 |   Exclude:
30 |     - 'spec/**/*.rb'
31 | 
32 | Style/IfUnlessModifier:
33 |   Enabled: false
34 | 
35 | Style/StringLiterals:
36 |   EnforcedStyle: double_quotes
37 | 
38 | RSpec/MultipleExpectations:
39 |   Max: 10
40 | 
41 | RSpec/ExampleLength:
42 |   Max: 30
43 | 
44 | RSpec/InstanceVariable:
45 |   Enabled: false
46 | 
47 | RSpec/MessageSpies:
48 |   Enabled: false
49 | 
50 | RSpec/NestedGroups:
51 |   Max: 5
52 | 
53 | RSpec/MultipleMemoizedHelpers:
54 |   Max: 10
55 | 
56 | RSpec/BeforeAfterAll:
57 |   Enabled: false
58 | 
59 | RSpec/RepeatedExample:
60 |   Enabled: false
61 | 
62 | Style/Copyright: { Enabled: false }
63 | 


--------------------------------------------------------------------------------
/spec/feedjira/atom_entry_utilities_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | RSpec.describe Feedjira::AtomEntryUtilities do
 6 |   def klass
 7 |     Class.new do
 8 |       include SAXMachine
 9 |       include Feedjira::AtomEntryUtilities
10 |     end
11 |   end
12 | 
13 |   describe "#title" do
14 |     it "returns the title when set" do
15 |       entry = klass.new
16 |       entry.title = "My Title"
17 | 
18 |       expect(entry.title).to eq "My Title"
19 |     end
20 | 
21 |     it "returns a sanitized version of the raw title when present" do
22 |       entry = klass.new
23 |       entry.raw_title = "My <b>Raw</b> \tTitle"
24 | 
25 |       expect(entry.title).to eq "My Raw Title"
26 |     end
27 | 
28 |     it "returns nil when no raw title is present" do
29 |       entry = klass.new
30 | 
31 |       expect(entry.title).to be_nil
32 |     end
33 |   end
34 | 
35 |   describe "#url" do
36 |     it "returns the url when set" do
37 |       entry = klass.new
38 |       entry.url = "http://exampoo.com/feed"
39 | 
40 |       expect(entry.url).to eq "http://exampoo.com/feed"
41 |     end
42 | 
43 |     it "returns the first link when not set" do
44 |       entry = klass.new
45 |       entry.links = ["http://exampoo.com/feed"]
46 | 
47 |       expect(entry.url).to eq "http://exampoo.com/feed"
48 |     end
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/atom_with_link_tag_for_url_unmarked.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | <?xml version="1.0" encoding="utf-8" standalone="yes" ?>
 3 | <feed xmlns="http://www.w3.org/2005/Atom">
 4 |  
 5 |   <title>Planet innoQ</title>
 6 |   <link rel="self" href="http://www.innoq.com/planet/atom.xml"/>
 7 |   <link href="http://www.innoq.com/planet/"/>
 8 |   <id>http://www.innoq.com/planet/atom.xml</id>
 9 |   <updated>2009-07-10T12:30:05+00:00</updated>
10 |   <generator uri="http://www.planetplanet.org/">Planet/1.0 +http://www.planetplanet.org</generator>
11 |  
12 |   <entry>
13 |     <title type="html">ja,</title>
14 |     <link href="http://www.innoq.com/blog/phaus/2009/07/ja.html"/>
15 |     <id>tag:www.innoq.com,2009:/blog/phaus//25.3526</id>
16 |     <updated>2009-07-01T22:20:05+00:00</updated>
17 |     <content type="html" xml:lang="en">ich lebe noch.
18 | Sobald mir mehr einfällt, schreibe ich mal wieder was :-).</content>
19 |     <author>
20 |       <name>Philipp Haussleiter</name>
21 |       <uri>http://www.innoq.com/blog/phaus/</uri>
22 |     </author>
23 |     <source>
24 |       <title type="html">Philipps paper equivalent Blog</title>
25 |       <link rel="self" href="http://www.innoq.com/blog/phaus/atom.xml"/>
26 |       <id>tag:www.innoq.com,2009:/blog/phaus//25</id>
27 |       <updated>2009-07-01T22:20:05+00:00</updated>
28 |     </source>
29 |   </entry>
30 |  
31 | </feed>
32 | 


--------------------------------------------------------------------------------
/lib/feedjira/feed.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   class Feed
 5 |     class << self
 6 |       def add_common_feed_element(element_tag, options = {})
 7 |         Feedjira.parsers.each do |k|
 8 |           k.element(element_tag, options)
 9 |         end
10 |       end
11 | 
12 |       def add_common_feed_elements(element_tag, options = {})
13 |         Feedjira.parsers.each do |k|
14 |           k.elements(element_tag, options)
15 |         end
16 |       end
17 | 
18 |       def add_common_feed_entry_element(element_tag, options = {})
19 |         call_on_each_feed_entry(:element, element_tag, options)
20 |       end
21 | 
22 |       def add_common_feed_entry_elements(element_tag, options = {})
23 |         call_on_each_feed_entry(:elements, element_tag, options)
24 |       end
25 | 
26 |       private
27 | 
28 |       def call_on_each_feed_entry(method, *parameters)
29 |         Feedjira.parsers.each do |klass|
30 |           klass.sax_config.collection_elements.each_value do |value|
31 |             collection_configs = value.select do |v|
32 |               v.accessor == "entries" && v.data_class.is_a?(Class)
33 |             end
34 | 
35 |             collection_configs.each do |config|
36 |               config.data_class.send(method, *parameters)
37 |             end
38 |           end
39 |         end
40 |       end
41 |     end
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/atom_youtube_entry.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     class AtomYoutubeEntry
 6 |       include SAXMachine
 7 |       include FeedEntryUtilities
 8 |       include AtomEntryUtilities
 9 | 
10 |       sax_config.top_level_elements["link"].clear
11 |       sax_config.collection_elements["link"].clear
12 | 
13 |       element :link, as: :url, value: :href, with: { rel: "alternate" }
14 | 
15 |       element :"media:description", as: :content
16 |       element :"yt:videoId", as: :youtube_video_id
17 |       element :"yt:channelId", as: :youtube_channel_id
18 |       element :"media:title", as: :media_title
19 |       element :"media:content", as: :media_url, value: :url
20 |       element :"media:content", as: :media_type, value: :type
21 |       element :"media:content", as: :media_width, value: :width
22 |       element :"media:content", as: :media_height, value: :height
23 |       element :"media:thumbnail", as: :media_thumbnail_url, value: :url
24 |       element :"media:thumbnail", as: :media_thumbnail_width, value: :width
25 |       element :"media:thumbnail", as: :media_thumbnail_height, value: :height
26 |       element :"media:starRating", as: :media_star_count, value: :count
27 |       element :"media:starRating", as: :media_star_average, value: :average
28 |       element :"media:statistics", as: :media_views, value: :views
29 |     end
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/json_feed.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with JSON Feeds.
 6 |     class JSONFeed
 7 |       include SAXMachine
 8 |       include FeedUtilities
 9 | 
10 |       def self.able_to_parse?(json)
11 |         json.include?("https://jsonfeed.org/version/") ||
12 |           json.include?('https:\/\/jsonfeed.org\/version\/')
13 |       end
14 | 
15 |       def self.parse(json)
16 |         new(JSON.parse(json))
17 |       end
18 | 
19 |       attr_reader :json, :version, :title, :description, :url, :feed_url, :icon, :favicon,
20 |                   :language, :expired, :entries
21 | 
22 |       def initialize(json)
23 |         @json = json
24 |         @version = json.fetch("version")
25 |         @title = json.fetch("title")
26 |         @url = json.fetch("home_page_url", nil)
27 |         @feed_url = json.fetch("feed_url", nil)
28 |         @icon = json.fetch("icon", nil)
29 |         @favicon = json.fetch("favicon", nil)
30 |         @description = json.fetch("description", nil)
31 |         @language = json.fetch("language", nil)
32 |         @expired = json.fetch("expired", nil)
33 |         @entries = parse_items(json["items"])
34 |       end
35 | 
36 |       private
37 | 
38 |       def parse_items(items)
39 |         items.map do |item|
40 |           Feedjira::Parser::JSONFeedItem.new(item)
41 |         end
42 |       end
43 |     end
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/atom_youtube_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
 4 | 
 5 | describe Feedjira::Parser::AtomYoutube do
 6 |   describe "#will_parse?" do
 7 |     it "returns true for an atom youtube feed" do
 8 |       expect(described_class).to be_able_to_parse(sample_youtube_atom_feed)
 9 |     end
10 | 
11 |     it "returns fase for an atom feed" do
12 |       expect(described_class).not_to be_able_to_parse(sample_atom_feed)
13 |     end
14 | 
15 |     it "returns false for an rss feedburner feed" do
16 |       expect(described_class).not_to be_able_to_parse(sample_rss_feed_burner_feed)
17 |     end
18 |   end
19 | 
20 |   describe "parsing" do
21 |     before do
22 |       @feed = described_class.parse(sample_youtube_atom_feed)
23 |     end
24 | 
25 |     it "parses the title" do
26 |       expect(@feed.title).to eq "Google"
27 |     end
28 | 
29 |     it "parses the author" do
30 |       expect(@feed.author).to eq "Google Author"
31 |     end
32 | 
33 |     it "parses the url" do
34 |       expect(@feed.url).to eq "http://www.youtube.com/user/Google"
35 |     end
36 | 
37 |     it "parses the feed_url" do
38 |       expect(@feed.feed_url).to eq "http://www.youtube.com/feeds/videos.xml?user=google"
39 |     end
40 | 
41 |     it "parses the YouTube channel id" do
42 |       expect(@feed.youtube_channel_id).to eq "UCK8sQmJBp8GCxrOtXWBpyEA"
43 |     end
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/podlove_chapter_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Parser::PodloveChapter do
 6 |   before do
 7 |     @item = Feedjira::Parser::ITunesRSS.parse(sample_podlove_feed).entries.first
 8 |     @chapter = @item.chapters.first
 9 |   end
10 | 
11 |   it "parses chapters" do
12 |     expect(@item.chapters.size).to eq 15
13 |   end
14 | 
15 |   it "sorts chapters by time" do
16 |     expect(@item.chapters.last.title).to eq "Abschied"
17 |   end
18 | 
19 |   describe "#start" do
20 |     it "returns the start time" do
21 |       expect(@chapter.start_ntp).to eq "00:00:26.407"
22 |       expect(@chapter.start).to eq 26.407
23 |       expect(@item.chapters[1].start).to eq 50
24 |       expect(@item.chapters[2].start).to eq 59.12
25 |       expect(@item.chapters[3].start).to eq 89.201
26 |       expect(@item.chapters.last.start).to eq 5700.034
27 |     end
28 | 
29 |     it "returns nil when start_ntp is not present" do
30 |       chapter = described_class.new
31 | 
32 |       expect(chapter.start).to be_nil
33 |     end
34 |   end
35 | 
36 |   it "parses the title" do
37 |     expect(@chapter.title).to eq "Neil DeGrasse Tyson on Science"
38 |   end
39 | 
40 |   it "parses the link" do
41 |     expect(@chapter.url).to eq "https://example.com"
42 |   end
43 | 
44 |   it "parses the image" do
45 |     expect(@chapter.image).to eq "https://pics.example.com/pic.png"
46 |   end
47 | end
48 | 


--------------------------------------------------------------------------------
/lib/feedjira/atom_entry_utilities.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module AtomEntryUtilities
 5 |     def self.included(mod)
 6 |       mod.class_exec do
 7 |         element :title, as: :raw_title, with: { type: "html" }
 8 |         element :title, as: :raw_title, with: { type: "xhtml" }
 9 |         element :title, as: :raw_title, with: { type: "xml" }
10 |         element :title, as: :title, with: { type: "text" }
11 |         element :title, as: :title, with: { type: nil }
12 |         element :title, as: :title_type, value: :type
13 | 
14 |         element :name, as: :author
15 |         element :content
16 |         element :summary
17 |         element :enclosure, as: :image, value: :href
18 | 
19 |         element :published
20 |         element :id, as: :entry_id
21 |         element :created, as: :published
22 |         element :issued, as: :published
23 |         element :updated
24 |         element :modified, as: :updated
25 | 
26 |         elements :category, as: :categories, value: :term
27 | 
28 |         element :link, as: :url, value: :href, with: {
29 |           type: "text/html",
30 |           rel: "alternate"
31 |         }
32 | 
33 |         elements :link, as: :links, value: :href
34 |       end
35 |     end
36 | 
37 |     def title
38 |       @title ||=
39 |         case @raw_title
40 |         when String
41 |           Loofah.fragment(@raw_title).xpath("normalize-space(.)")
42 |         else
43 |           @title
44 |         end
45 |     end
46 | 
47 |     def url
48 |       @url ||= links.first
49 |     end
50 |   end
51 | end
52 | 


--------------------------------------------------------------------------------
/lib/feedjira/util/parse_time.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "time"
 4 | require "date"
 5 | 
 6 | module Feedjira
 7 |   module Util
 8 |     # Module for safely parsing time strings
 9 |     module ParseTime
10 |       # Parse a time string and convert it to UTC without raising errors.
11 |       # Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
12 |       #
13 |       # === Parameters
14 |       # [dt<String or Time>] Time definition to be parsed.
15 |       #
16 |       # === Returns
17 |       # A Time instance in UTC or nil if there were errors while parsing.
18 |       def self.call(datetime)
19 |         if datetime.is_a?(Time)
20 |           datetime.utc
21 |         elsif datetime.respond_to?(:to_datetime)
22 |           datetime.to_time.utc
23 |         else
24 |           parse_string_safely datetime.to_s
25 |         end
26 |       rescue StandardError => e
27 |         Feedjira.logger.debug("Failed to parse time #{datetime}")
28 |         Feedjira.logger.debug(e)
29 |         nil
30 |       end
31 | 
32 |       # Parse a string safely, handling special 14-digit format
33 |       #
34 |       # === Parameters
35 |       # [string<String>] String to be parsed as time.
36 |       #
37 |       # === Returns
38 |       # A Time instance in UTC or nil if there were errors while parsing.
39 |       def self.parse_string_safely(string)
40 |         return nil if string.empty?
41 | 
42 |         if /\A\d{14}\z/.match?(string)
43 |           Time.parse("#{string}Z", true)
44 |         else
45 |           Time.parse(string).utc
46 |         end
47 |       end
48 | 
49 |       private_class_method :parse_string_safely
50 |     end
51 |   end
52 | end
53 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/atom_feed_burner.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with Feedburner Atom feeds.
 6 |     class AtomFeedBurner
 7 |       include SAXMachine
 8 |       include FeedUtilities
 9 | 
10 |       element :title
11 |       element :subtitle, as: :description
12 |       element :link, as: :url_text_html, value: :href,
13 |                      with: { type: "text/html" }
14 |       element :link, as: :url_notype, value: :href, with: { type: nil }
15 |       element :link, as: :feed_url_link, value: :href, with: { type: "application/atom+xml" }
16 |       element :"atom10:link", as: :feed_url_atom10_link, value: :href,
17 |                               with: { type: "application/atom+xml" }
18 |       elements :"atom10:link", as: :hubs, value: :href, with: { rel: "hub" }
19 |       elements :entry, as: :entries, class: AtomFeedBurnerEntry
20 | 
21 |       attr_writer :url, :feed_url
22 | 
23 |       def self.able_to_parse?(xml)
24 |         (xml.include?("<feed") && xml.include?("Atom") && xml.include?("feedburner") && !(/<rss|<rdf/ =~ xml)) || false
25 |       end
26 | 
27 |       # Feed url is <link> with type="text/html" if present,
28 |       # <link> with no type attribute otherwise
29 |       def url
30 |         @url || @url_text_html || @url_notype
31 |       end
32 | 
33 |       # Feed feed_url is <link> with type="application/atom+xml" if present,
34 |       # <atom10:link> with type="application/atom+xml" otherwise
35 |       def feed_url
36 |         @feed_url || @feed_url_link || @feed_url_atom10_link
37 |       end
38 | 
39 |       def self.preprocess(xml)
40 |         Preprocessor.new(xml).to_xml
41 |       end
42 |     end
43 |   end
44 | end
45 | 


--------------------------------------------------------------------------------
/spec/feedjira/util/parse_time_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | RSpec.describe Feedjira::Util::ParseTime do
 6 |   describe ".call" do
 7 |     it "returns the datetime in utc when given a Time" do
 8 |       time = Time.now
 9 | 
10 |       expect(described_class.call(time)).to eq(time.utc)
11 |     end
12 | 
13 |     it "returns the datetime in utc when given a Date" do
14 |       date = Date.today
15 | 
16 |       expect(described_class.call(date)).to eq(date.to_time.utc)
17 |     end
18 | 
19 |     it "returns the datetime in utc when given a String" do
20 |       timestamp = "2016-01-01 00:00:00"
21 | 
22 |       expect(described_class.call(timestamp)).to eq(Time.parse(timestamp).utc)
23 |     end
24 | 
25 |     it "returns nil when given an empty String" do
26 |       timestamp = ""
27 | 
28 |       expect(described_class.call(timestamp)).to be_nil
29 |     end
30 | 
31 |     it "returns the the datetime in utc given a 14-digit time" do
32 |       time = Time.now.utc
33 |       timestamp = time.strftime("%Y%m%d%H%M%S")
34 | 
35 |       expect(described_class.call(timestamp)).to eq(time.floor)
36 |     end
37 | 
38 |     context "when given an invalid time string" do
39 |       it "returns nil" do
40 |         timestamp = "2016-51-51 00:00:00"
41 | 
42 |         expect(described_class.call(timestamp)).to be_nil
43 |       end
44 | 
45 |       it "logs an error" do
46 |         timestamp = "2016-51-51 00:00:00"
47 | 
48 |         expect(Feedjira.logger)
49 |           .to receive(:debug).with("Failed to parse time #{timestamp}")
50 |         expect(Feedjira.logger)
51 |           .to receive(:debug).with(an_instance_of(ArgumentError))
52 | 
53 |         described_class.call(timestamp)
54 |       end
55 |     end
56 |   end
57 | end
58 | 


--------------------------------------------------------------------------------
/lib/feedjira/rss_entry_utilities.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module RSSEntryUtilities
 5 |     # rubocop:todo Metrics/MethodLength
 6 |     def self.included(mod) # rubocop:todo Metrics/AbcSize, Metrics/MethodLength
 7 |       mod.class_exec do
 8 |         element :title
 9 | 
10 |         element :"content:encoded", as: :content
11 |         element :"a10:content", as: :content
12 |         element :description, as: :summary
13 | 
14 |         element :link, as: :url
15 |         element :"a10:link", as: :url, value: :href
16 | 
17 |         element :author
18 |         element :"dc:creator", as: :author
19 |         element :"a10:name", as: :author
20 | 
21 |         element :pubDate, as: :published
22 |         element :pubdate, as: :published
23 |         element :issued, as: :published
24 |         element :"dc:date", as: :published
25 |         element :"dc:Date", as: :published
26 |         element :"dcterms:created", as: :published
27 | 
28 |         element :"dcterms:modified", as: :updated
29 |         element :"a10:updated", as: :updated
30 | 
31 |         element :guid, as: :entry_id, class: Feedjira::Parser::GloballyUniqueIdentifier
32 |         element :"dc:identifier", as: :dc_identifier
33 | 
34 |         element :"media:thumbnail", as: :image, value: :url
35 |         element :"media:content", as: :image, value: :url
36 |         element :enclosure, as: :image, value: :url
37 | 
38 |         element :comments
39 | 
40 |         elements :category, as: :categories
41 |       end
42 |     end
43 |     # rubocop:enable Metrics/MethodLength
44 | 
45 |     def entry_id
46 |       @entry_id&.guid
47 |     end
48 | 
49 |     def url
50 |       @url || @entry_id&.url
51 |     end
52 | 
53 |     def id
54 |       entry_id || @dc_identifier || @url
55 |     end
56 |   end
57 | end
58 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/atom_google_alerts_entry_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Parser::AtomGoogleAlertsEntry do
 6 |   before do
 7 |     feed = Feedjira::Parser::AtomGoogleAlerts.parse sample_google_alerts_atom_feed
 8 |     @entry = feed.entries.first
 9 |   end
10 | 
11 |   it "parses the title" do
12 |     expect(@entry.title).to eq "Report offers Prediction of Automotive Slack Market by Top key players like Haldex, Meritor, Bendix ..."
13 |     expect(@entry.raw_title).to eq "Report offers Prediction of Automotive <b>Slack</b> Market by Top key players like Haldex, Meritor, Bendix ..."
14 |     expect(@entry.title_type).to eq "html"
15 |   end
16 | 
17 |   it "parses the url out of the params when the host is google" do
18 |     url = "https://www.exampoo.com"
19 |     entry = described_class.new(url: "https://www.google.com/url?url=#{url}")
20 | 
21 |     expect(entry.url).to eq url
22 |   end
23 | 
24 |   it "returns nil when the url is not present" do
25 |     entry = described_class.new
26 | 
27 |     expect(entry.url).to be_nil
28 |   end
29 | 
30 |   it "returns nil when the host is not google" do
31 |     entry = described_class.new(url: "https://www.exampoo.com")
32 | 
33 |     expect(entry.url).to be_nil
34 |   end
35 | 
36 |   it "parses the content" do
37 |     expect(@entry.content).to eq "Automotive <b>Slack</b> Market reports provides a comprehensive overview of the global market size and share. It provides strategists, marketers and senior&nbsp;..."
38 |   end
39 | 
40 |   it "parses the published date" do
41 |     published = Feedjira::Util::ParseTime.call "2019-07-10T11:53:37Z"
42 |     expect(@entry.published).to eq published
43 |   end
44 | 
45 |   it "parses the updated date" do
46 |     updated = Feedjira::Util::ParseTime.call "2019-07-10T11:53:37Z"
47 |     expect(@entry.updated).to eq updated
48 |   end
49 | end
50 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/json_feed_item.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # Parser for dealing with JSON Feed items.
 6 |     class JSONFeedItem
 7 |       include FeedEntryUtilities
 8 | 
 9 |       attr_reader :json, :entry_id, :url, :external_url, :title, :content, :summary,
10 |                   :published, :updated, :image, :banner_image, :author, :categories
11 | 
12 |       def initialize(json)
13 |         @json = json
14 |         @entry_id = json.fetch("id")
15 |         @url = json.fetch("url")
16 |         @external_url = json.fetch("external_url", nil)
17 |         @title = json.fetch("title", nil)
18 |         @content = parse_content(json.fetch("content_html", nil), json.fetch("content_text", nil))
19 |         @summary = json.fetch("summary", nil)
20 |         @image = json.fetch("image", nil)
21 |         @banner_image = json.fetch("banner_image", nil)
22 |         @published = parse_published(json.fetch("date_published", nil))
23 |         @updated = parse_updated(json.fetch("date_modified", nil))
24 |         @author = author_name(json.fetch("author", nil))
25 |         @categories = json.fetch("tags", [])
26 |       end
27 | 
28 |       private
29 | 
30 |       def parse_published(date_published)
31 |         return nil unless date_published
32 | 
33 |         Feedjira::Util::ParseTime.call(date_published)
34 |       end
35 | 
36 |       def parse_updated(date_modified)
37 |         return nil unless date_modified
38 | 
39 |         Feedjira::Util::ParseTime.call(date_modified)
40 |       end
41 | 
42 |       # Convenience method to return the included content type.
43 |       # Prefer content_html unless it isn't included.
44 |       def parse_content(content_html, content_text)
45 |         return content_html unless content_html.nil?
46 | 
47 |         content_text
48 |       end
49 | 
50 |       def author_name(author_obj)
51 |         return nil if author_obj.nil?
52 | 
53 |         author_obj["name"]
54 |       end
55 |     end
56 |   end
57 | end
58 | 


--------------------------------------------------------------------------------
/lib/feedjira/configuration.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Feedjira::Configuration
 4 | module Feedjira
 5 |   # Provides global configuration options for Feedjira
 6 |   #
 7 |   # @example Set configuration options using a block
 8 |   #   Feedjira.configure do |config|
 9 |   #     config.strip_whitespace = true
10 |   #   end
11 |   module Configuration
12 |     attr_accessor(
13 |       :logger,
14 |       :parsers,
15 |       :strip_whitespace
16 |     )
17 | 
18 |     # Modify Feedjira's current configuration
19 |     #
20 |     # @yieldparam [Feedjria] config current Feedjira config
21 |     # @example
22 |     #   Feedjira.configure do |config|
23 |     #     config.strip_whitespace = true
24 |     #   end
25 |     def configure
26 |       yield self
27 |     end
28 | 
29 |     # Reset Feedjira's configuration to defaults
30 |     #
31 |     # @example
32 |     #   Feedjira.reset_configuration!
33 |     def reset_configuration!
34 |       set_default_configuration
35 |     end
36 | 
37 |     # @private
38 |     def self.extended(base)
39 |       base.set_default_configuration
40 |     end
41 | 
42 |     # @private
43 |     def set_default_configuration
44 |       self.logger = default_logger
45 |       self.parsers = default_parsers
46 |       self.strip_whitespace = false
47 |     end
48 | 
49 |     private
50 | 
51 |     # @private
52 |     def default_logger
53 |       Logger.new($stdout).tap do |logger|
54 |         logger.progname = "Feedjira"
55 |         logger.level = Logger::WARN
56 |       end
57 |     end
58 | 
59 |     # @private
60 |     def default_parsers
61 |       [
62 |         Feedjira::Parser::ITunesRSS,
63 |         Feedjira::Parser::RSSFeedBurner,
64 |         Feedjira::Parser::GoogleDocsAtom,
65 |         Feedjira::Parser::AtomYoutube,
66 |         Feedjira::Parser::AtomFeedBurner,
67 |         Feedjira::Parser::AtomGoogleAlerts,
68 |         Feedjira::Parser::Atom,
69 |         Feedjira::Parser::RSS,
70 |         Feedjira::Parser::JSONFeed
71 |       ]
72 |     end
73 |   end
74 | end
75 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/atom_google_alerts_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | module Feedjira
 6 |   module Parser
 7 |     describe "#able_to_parse?" do
 8 |       it "returns true for a Google Alerts atom feed" do
 9 |         expect(AtomGoogleAlerts).to be_able_to_parse(sample_google_alerts_atom_feed)
10 |       end
11 | 
12 |       it "returns false for an rdf feed" do
13 |         expect(AtomGoogleAlerts).not_to be_able_to_parse(sample_rdf_feed)
14 |       end
15 | 
16 |       it "returns false for a regular atom feed" do
17 |         expect(AtomGoogleAlerts).not_to be_able_to_parse(sample_atom_feed)
18 |       end
19 | 
20 |       it "returns false for a feedburner atom feed" do
21 |         expect(AtomGoogleAlerts).not_to be_able_to_parse(sample_feedburner_atom_feed)
22 |       end
23 |     end
24 | 
25 |     describe "parsing" do
26 |       before do
27 |         @feed = AtomGoogleAlerts.parse(sample_google_alerts_atom_feed)
28 |       end
29 | 
30 |       it "parses the title" do
31 |         expect(@feed.title).to eq "Google Alert - Slack"
32 |       end
33 | 
34 |       it "parses the descripton" do
35 |         expect(@feed.description).to be_nil
36 |       end
37 | 
38 |       it "parses the url" do
39 |         expect(@feed.url).to eq "https://www.google.com/alerts/feeds/04175468913983673025/4428013283581841004"
40 |       end
41 | 
42 |       it "parses the feed_url" do
43 |         expect(@feed.feed_url).to eq "https://www.google.com/alerts/feeds/04175468913983673025/4428013283581841004"
44 |       end
45 | 
46 |       it "parses entries" do
47 |         expect(@feed.entries.size).to eq 20
48 |       end
49 |     end
50 | 
51 |     describe "preprocessing" do
52 |       it "retains markup in xhtml content" do
53 |         AtomGoogleAlerts.preprocess_xml = true
54 | 
55 |         feed = AtomGoogleAlerts.parse sample_google_alerts_atom_feed
56 |         entry = feed.entries.first
57 | 
58 |         expect(entry.content).to include("<b>Slack</b>")
59 |       end
60 |     end
61 |   end
62 | end
63 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/itunes_rss_item.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # iTunes extensions to the standard RSS2.0 item
 6 |     # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
 7 |     class ITunesRSSItem
 8 |       include SAXMachine
 9 |       include FeedEntryUtilities
10 |       include RSSEntryUtilities
11 | 
12 |       sax_config.top_level_elements["enclosure"].clear
13 | 
14 |       # If author is not present use author tag on the item
15 |       element :"itunes:author", as: :itunes_author
16 |       element :"itunes:block", as: :itunes_block
17 |       element :"itunes:duration", as: :itunes_duration
18 |       element :"itunes:explicit", as: :itunes_explicit
19 |       element :"itunes:keywords", as: :itunes_keywords
20 |       element :"itunes:subtitle", as: :itunes_subtitle
21 |       element :"itunes:image", value: :href, as: :itunes_image
22 |       element :"itunes:isClosedCaptioned", as: :itunes_closed_captioned
23 |       element :"itunes:order", as: :itunes_order
24 |       element :"itunes:season", as: :itunes_season
25 |       element :"itunes:episode", as: :itunes_episode
26 |       element :"itunes:title", as: :itunes_title
27 |       element :"itunes:episodeType", as: :itunes_episode_type
28 | 
29 |       # If summary is not present, use the description tag
30 |       element :"itunes:summary", as: :itunes_summary
31 |       element :enclosure, value: :length, as: :enclosure_length
32 |       element :enclosure, value: :type, as: :enclosure_type
33 |       element :enclosure, value: :url, as: :enclosure_url
34 |       elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter
35 | 
36 |       # Podlove requires clients to re-order by start time in the
37 |       # event the publisher doesn't provide them in that
38 |       # order. SAXMachine doesn't have any sort capability afaik, so
39 |       # we have to sort chapters manually.
40 |       def chapters
41 |         raw_chapters.sort_by(&:start)
42 |       end
43 |     end
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/google_docs_atom_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | module Feedjira
 6 |   module Parser
 7 |     describe ".able_to_parser?" do
 8 |       it "returns true for Google Docs feed" do
 9 |         expect(GoogleDocsAtom).to be_able_to_parse(sample_google_docs_list_feed)
10 |       end
11 | 
12 |       it "is not able to parse another Atom feed" do
13 |         expect(GoogleDocsAtom).not_to be_able_to_parse(sample_atom_feed)
14 |       end
15 |     end
16 | 
17 |     describe "parsing" do
18 |       before do
19 |         @feed = GoogleDocsAtom.parse(sample_google_docs_list_feed)
20 |       end
21 | 
22 |       it "returns a bunch of objects" do
23 |         expect(@feed.entries).not_to be_empty
24 |       end
25 | 
26 |       it "populates a title, interhited from the Atom entry" do
27 |         expect(@feed.title).not_to be_nil
28 |       end
29 | 
30 |       it "returns a bunch of entries of type GoogleDocsAtomEntry" do
31 |         expect(@feed.entries.first).to be_a GoogleDocsAtomEntry
32 |       end
33 |     end
34 | 
35 |     describe "#url" do
36 |       it "returns the url when set" do
37 |         feed = GoogleDocsAtom.new
38 | 
39 |         feed.url = "http://exampoo.com/feed"
40 | 
41 |         expect(feed.url).to eq "http://exampoo.com/feed"
42 |       end
43 | 
44 |       it "returns the first link when not set" do
45 |         feed = GoogleDocsAtom.new
46 | 
47 |         feed.links = ["http://exampoo.com/feed"]
48 | 
49 |         expect(feed.url).to eq "http://exampoo.com/feed"
50 |       end
51 |     end
52 | 
53 |     describe "#feed_url" do
54 |       it "returns the feed_url when set" do
55 |         feed = GoogleDocsAtom.new
56 | 
57 |         feed.feed_url = "http://exampoo.com/feed"
58 | 
59 |         expect(feed.feed_url).to eq "http://exampoo.com/feed"
60 |       end
61 | 
62 |       it "returns the first link when not set" do
63 |         feed = GoogleDocsAtom.new
64 | 
65 |         feed.links = ["http://exampoo.com/feed"]
66 | 
67 |         expect(feed.feed_url).to eq "http://exampoo.com/feed"
68 |       end
69 |     end
70 |   end
71 | end
72 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/json_feed_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | module Feedjira
 6 |   module Parser
 7 |     describe ".able_to_parse?" do
 8 |       it "returns true for a JSON feed" do
 9 |         expect(JSONFeed).to be_able_to_parse(sample_json_feed)
10 |       end
11 | 
12 |       it "returns true for a JSON feed with escaped URIs" do
13 |         expect(JSONFeed).to be_able_to_parse(sample_json_feed_with_escaped_uris)
14 |       end
15 | 
16 |       it "returns false for an RSS feed" do
17 |         expect(JSONFeed).not_to be_able_to_parse(sample_rss_feed)
18 |       end
19 | 
20 |       it "returns false for an Atom feed" do
21 |         expect(JSONFeed).not_to be_able_to_parse(sample_atom_feed)
22 |       end
23 |     end
24 | 
25 |     describe "parsing" do
26 |       before do
27 |         @feed = JSONFeed.parse(sample_json_feed)
28 |       end
29 | 
30 |       it "parses the version" do
31 |         expect(@feed.version).to eq "https://jsonfeed.org/version/1"
32 |       end
33 | 
34 |       it "parses the title" do
35 |         expect(@feed.title).to eq "inessential.com"
36 |       end
37 | 
38 |       it "parses the url" do
39 |         expect(@feed.url).to eq "http://inessential.com/"
40 |       end
41 | 
42 |       it "parses the feed_url" do
43 |         expect(@feed.feed_url).to eq "http://inessential.com/feed.json"
44 |       end
45 | 
46 |       it "parses the description" do
47 |         expect(@feed.description).to eq "Brent Simmons’s weblog."
48 |       end
49 | 
50 |       it "parses the favicon" do
51 |         expect(@feed.favicon).to eq "http://inessential.com/favicon.ico"
52 |       end
53 | 
54 |       it "parses the icon" do
55 |         expect(@feed.icon).to eq "http://inessential.com/icon.png"
56 |       end
57 | 
58 |       it "parses the language" do
59 |         expect(@feed.language).to eq "en-US"
60 |       end
61 | 
62 |       it "parses expired and return default (nil)" do
63 |         expect(@feed.expired).to be_nil
64 |       end
65 | 
66 |       it "parses entries" do
67 |         expect(@feed.entries.size).to eq 20
68 |       end
69 |     end
70 |   end
71 | end
72 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/HREFConsideredHarmfulFirstEntry.xml:
--------------------------------------------------------------------------------
 1 | <p>There's lots to like about Google's new web browser, <a href="http://www.google.com/chrome">Chrome</a>, which was released today.&nbsp; When I read the awesome <a href="http://www.google.com/googlebooks/chrome/">comic strip introduction</a> yesterday, however, the thing that stood out most for me was in very small type: the name Lars Bak attached to the V8 JavaScript engine.&nbsp; I know of Lars from his work on Self, Strongtalk, HotSpot and OOVM, and his involvement in V8 says a lot about the kind of language implementation it will be.&nbsp; David Griswold has posted some <a href="http://groups.google.com/group/strongtalk-general/browse_thread/thread/40eb8f405fbd3041">more information </a>on the Strongtalk list:
 2 | 
 3 | </p><blockquote><p>
 4 | The V8 development team has multiple members of the original
 5 | Animorphic team; it is headed by Lars Bak, who was the technical lead
 6 | for both Strongtalk and the HotSpot Java VM (as well as a huge
 7 | contributor to the original Self VM).&nbsp; &nbsp;I think that you will find
 8 | that V8 has a lot of the creamy goodness of the Strongtalk and Self
 9 | VMs, with many big architectural improvements
10 | </p></blockquote><p>
11 | 
12 | I'll post more on this later, but things are getting interesting...</p>
13 | 
14 | <p>Update: the V8 code is already <a href="http://code.google.com/apis/v8/">available</a>, and builds and runs fine on Mac OS X.&nbsp; From the <a href="http://code.google.com/apis/v8/design.html">design docs</a>, it's pretty clear that this is indeed what I was hoping for: a mainstream, open source dynamic language implementation that learned and applies the lessons from Smalltalk, Self and Strongtalk.&nbsp; Most telling are that the only two papers cited in that document are titled &quot;An Efficient Implementation of Self&quot; and &quot;An Efficient Implementation of the Smalltalk-80 System&quot;.</p>
15 | 
16 | <p>The &quot;classes as nodes in a state machine&quot; trick for expando properties is especially neat.</p>
17 | 
18 | <p>The bad news: V8 is over 100,000 lines of C++.</p>
19 | 
20 | <p>&nbsp;</p>
21 | 
22 | <p><img src="http://www.avibryant.com/files/picture_19.png" /></p>


--------------------------------------------------------------------------------
/spec/feedjira/parser/rss_feed_burner_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | module Feedjira
 6 |   module Parser
 7 |     describe "#will_parse?" do
 8 |       it "returns true for a feedburner rss feed" do
 9 |         expect(RSSFeedBurner).to be_able_to_parse sample_rss_feed_burner_feed
10 |       end
11 | 
12 |       it "returns false for a regular RSS feed" do
13 |         expect(RSSFeedBurner).not_to be_able_to_parse sample_rss_feed
14 |       end
15 | 
16 |       it "returns false for a feedburner atom feed" do
17 |         expect(RSSFeedBurner).not_to be_able_to_parse sample_feedburner_atom_feed
18 |       end
19 | 
20 |       it "returns false for an rdf feed" do
21 |         expect(RSSFeedBurner).not_to be_able_to_parse sample_rdf_feed
22 |       end
23 | 
24 |       it "returns false for a regular atom feed" do
25 |         expect(RSSFeedBurner).not_to be_able_to_parse sample_atom_feed
26 |       end
27 |     end
28 | 
29 |     describe "parsing" do
30 |       before do
31 |         @feed = RSSFeedBurner.parse(sample_rss_feed_burner_feed)
32 |       end
33 | 
34 |       it "parses the title" do
35 |         expect(@feed.title).to eq "TechCrunch"
36 |       end
37 | 
38 |       it "parses the description" do
39 |         description = "TechCrunch is a group-edited blog that profiles the companies, products and events defining and transforming the new web."
40 |         expect(@feed.description).to eq description
41 |       end
42 | 
43 |       it "parses the url" do
44 |         expect(@feed.url).to eq "http://techcrunch.com"
45 |       end
46 | 
47 |       it "parses the last build date" do
48 |         expect(@feed.last_built).to eq "Wed, 02 Nov 2011 17:29:59 +0000"
49 |       end
50 | 
51 |       it "parses the hub urls" do
52 |         expect(@feed.hubs.count).to eq 2
53 |         expect(@feed.hubs.first).to eq "http://pubsubhubbub.appspot.com/"
54 |       end
55 | 
56 |       it "provides an accessor for the feed_url" do
57 |         expect(@feed).to respond_to :feed_url
58 |         expect(@feed).to respond_to :feed_url=
59 |       end
60 | 
61 |       it "parses entries" do
62 |         expect(@feed.entries.size).to eq 20
63 |       end
64 |     end
65 |   end
66 | end
67 | 


--------------------------------------------------------------------------------
/lib/feedjira/feed_entry_utilities.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module FeedEntryUtilities
 5 |     include Enumerable
 6 | 
 7 |     def published
 8 |       @published ||= @updated
 9 |     end
10 | 
11 |     def parse_datetime(string)
12 |       DateTime.parse(string).to_time.utc
13 |     rescue StandardError => e
14 |       Feedjira.logger.debug("Failed to parse date #{string.inspect}")
15 |       Feedjira.logger.debug(e)
16 |       nil
17 |     end
18 | 
19 |     ##
20 |     # Returns the id of the entry or its url if not id is present, as some
21 |     # formats don't support it
22 |     # rubocop:disable Naming/MemoizedInstanceVariableName
23 |     def id
24 |       @entry_id ||= @url
25 |     end
26 |     # rubocop:enable Naming/MemoizedInstanceVariableName
27 | 
28 |     ##
29 |     # Writer for published. By default, we keep the "oldest" publish time found.
30 |     def published=(val)
31 |       parsed = parse_datetime(val)
32 |       @published = parsed if parsed && (!@published || parsed < @published)
33 |     end
34 | 
35 |     ##
36 |     # Writer for updated. By default, we keep the most recent update time found.
37 |     def updated=(val)
38 |       parsed = parse_datetime(val)
39 |       @updated = parsed if parsed && (!@updated || parsed > @updated)
40 |     end
41 | 
42 |     def sanitize!
43 |       %w[title author summary content image].each do |name|
44 |         next unless respond_to?(name)
45 | 
46 |         current_value = send(name)
47 |         if current_value.is_a?(String)
48 |           send(:"#{name}=", Loofah.scrub_fragment(current_value, :prune).to_s)
49 |         end
50 |       end
51 |     end
52 | 
53 |     alias last_modified published
54 | 
55 |     def each
56 |       @rss_fields ||= instance_variables.map do |ivar|
57 |         ivar.to_s.sub("@", "")
58 |       end.select do |field| # rubocop:disable Style/MultilineBlockChain
59 |         # select callable (public) methods only
60 |         respond_to?(field)
61 |       end
62 | 
63 |       @rss_fields.each do |field|
64 |         yield(field, instance_variable_get(:"@#{field}"))
65 |       end
66 |     end
67 | 
68 |     def [](field)
69 |       instance_variable_get(:"@#{field}")
70 |     end
71 | 
72 |     def []=(field, value)
73 |       instance_variable_set(:"@#{field}", value)
74 |     end
75 |   end
76 | end
77 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/atom_feed_burner_entry_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Parser::AtomFeedBurnerEntry do
 6 |   before do
 7 |     Feedjira::Parser::AtomFeedBurner.preprocess_xml = false
 8 |     # I don't really like doing it this way because these unit test should only
 9 |     # rely on AtomEntry, but this is actually how it should work. You would
10 |     # never just pass entry xml straight to the AtomEnry
11 |     feed = Feedjira::Parser::AtomFeedBurner.parse sample_feedburner_atom_feed
12 |     @entry = feed.entries.first
13 |   end
14 | 
15 |   it "parses the title" do
16 |     expect(@entry.title).to eq "Making a Ruby C library even faster"
17 |   end
18 | 
19 |   it "is able to fetch a url via the 'alternate' rel if no origLink exists" do
20 |     xml = File.read("#{File.dirname(__FILE__)}/../../sample_feeds/PaulDixExplainsNothingAlternate.xml")
21 |     entry = Feedjira::Parser::AtomFeedBurner.parse(xml).entries.first
22 |     expect(entry.url).to eq("http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~3/519925023/making-a-ruby-c-library-even-faster.html")
23 |   end
24 | 
25 |   it "parses the url" do
26 |     expect(@entry.url).to eq "http://www.pauldix.net/2009/01/making-a-ruby-c-library-even-faster.html"
27 |   end
28 | 
29 |   it "parses the url when there is no alternate" do
30 |     xml = File.read("#{File.dirname(__FILE__)}/../../sample_feeds/FeedBurnerUrlNoAlternate.xml")
31 |     entry = Feedjira::Parser::AtomFeedBurner.parse(xml).entries.first
32 |     expect(entry.url).to eq "http://example.com/QQQQ.html"
33 |   end
34 | 
35 |   it "parses the author" do
36 |     expect(@entry.author).to eq "Paul Dix"
37 |   end
38 | 
39 |   it "parses the content" do
40 |     expect(@entry.content).to eq sample_feedburner_atom_entry_content
41 |   end
42 | 
43 |   it "provides a summary" do
44 |     summary = "Last week I released the first version of a SAX based XML parsing library called SAX-Machine. It uses Nokogiri, which uses libxml, so it's pretty fast. However, I felt that it could be even faster. The only question was how..."
45 |     expect(@entry.summary).to eq summary
46 |   end
47 | 
48 |   it "parses the published date" do
49 |     published = Feedjira::Util::ParseTime.call "Thu Jan 22 15:50:22 UTC 2009"
50 |     expect(@entry.published).to eq published
51 |   end
52 | 
53 |   it "parses the categories" do
54 |     expect(@entry.categories).to eq ["Ruby", "Another Category"]
55 |   end
56 | end
57 | 


--------------------------------------------------------------------------------
/spec/sample_feeds.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module SampleFeeds
 4 |   FEEDS = {
 5 |     sample_atom_feed: "AmazonWebServicesBlog.xml",
 6 |     sample_atom_simple: "atom_simple_single_entry.xml",
 7 |     sample_atom_simple_link_self: "atom_simple_single_entry_link_self.xml",
 8 |     sample_atom_middleman_feed: "FeedjiraBlog.xml",
 9 |     sample_atom_xhtml_feed: "pet_atom.xml",
10 |     sample_atom_feed_line_breaks: "AtomFeedWithSpacesAroundEquals.xml",
11 |     sample_atom_entry_content: "AmazonWebServicesBlogFirstEntryContent.xml",
12 |     sample_itunes_feed: "itunes.xml",
13 |     sample_itunes_feedburner_feed: "itunes_feedburner.xml",
14 |     sample_itunes_feed_with_single_quotes: "ITunesWithSingleQuotedAttributes.xml",
15 |     sample_itunes_feed_with_spaces: "ITunesWithSpacesInAttributes.xml",
16 |     sample_podlove_feed: "CRE.xml",
17 |     sample_rdf_feed: "HREFConsideredHarmful.xml",
18 |     sample_rdf_entry_content: "HREFConsideredHarmfulFirstEntry.xml",
19 |     sample_rss_feed_burner_feed: "TechCrunch.xml",
20 |     sample_rss_feed_burner_entry_content: "TechCrunchFirstEntry.xml",
21 |     sample_rss_feed_burner_entry_description: "TechCrunchFirstEntryDescription.xml",
22 |     sample_rss_feed: "TenderLovemaking.xml",
23 |     sample_rss_entry_content: "TenderLovemakingFirstEntry.xml",
24 |     sample_feedburner_atom_feed: "PaulDixExplainsNothing.xml",
25 |     sample_feedburner_atom_feed_alternate: "GiantRobotsSmashingIntoOtherGiantRobots.xml",
26 |     sample_feedburner_atom_entry_content: "PaulDixExplainsNothingFirstEntryContent.xml",
27 |     sample_google_alerts_atom_feed: "google_alerts_atom.xml",
28 |     sample_wfw_feed: "PaulDixExplainsNothingWFW.xml",
29 |     sample_google_docs_list_feed: "GoogleDocsList.xml",
30 |     sample_feed_burner_atom_xhtml_feed: "FeedBurnerXHTML.xml",
31 |     sample_duplicate_content_atom_feed: "DuplicateContentAtomFeed.xml",
32 |     sample_youtube_atom_feed: "youtube_atom.xml",
33 |     sample_atom_xhtml_with_escpaed_html_in_pre_tag_feed: "AtomEscapedHTMLInPreTag.xml",
34 |     sample_json_feed: "json_feed.json",
35 |     sample_json_feed_with_escaped_uris: "json_feed_with_escaped_uris.json",
36 |     sample_rss_feed_huffpost_ca: "HuffPostCanada.xml",
37 |     sample_invalid_date_format_feed: "InvalidDateFormat.xml",
38 |     sample_rss_feed_permalinks: "Permalinks.xml",
39 |     sample_rss_feed_with_a10_namespace: "a10.xml",
40 |     sample_rss_feed_with_comments: "RSSWithComments.xml"
41 |   }.freeze
42 | 
43 |   FEEDS.each do |method, filename|
44 |     define_method(method) { load_sample filename }
45 |   end
46 | 
47 |   def load_sample(filename)
48 |     File.read("#{File.dirname(__FILE__)}/sample_feeds/#{filename}")
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/lib/feedjira/parser/itunes_rss.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Feedjira
 4 |   module Parser
 5 |     # iTunes is RSS 2.0 + some apple extensions
 6 |     # Sources:
 7 |     #   * https://cyber.harvard.edu/rss/rss.html
 8 |     #   * http://lists.apple.com/archives/syndication-dev/2005/Nov/msg00002.html
 9 |     #   * https://help.apple.com/itc/podcasts_connect/
10 |     class ITunesRSS
11 |       include SAXMachine
12 |       include FeedUtilities
13 | 
14 |       attr_accessor :feed_url
15 | 
16 |       # RSS 2.0 elements that need including
17 |       element :copyright
18 |       element :description
19 |       element :image, class: RSSImage
20 |       element :language
21 |       element :lastBuildDate, as: :last_built
22 |       element :link, as: :url
23 |       element :managingEditor, as: :managing_editor
24 |       element :rss, as: :version, value: :version
25 |       element :title
26 |       element :ttl
27 | 
28 |       # If author is not present use managingEditor on the channel
29 |       element :"itunes:author", as: :itunes_author
30 |       element :"itunes:block", as: :itunes_block
31 |       element :"itunes:image", value: :href, as: :itunes_image
32 |       element :"itunes:explicit", as: :itunes_explicit
33 |       element :"itunes:complete", as: :itunes_complete
34 |       element :"itunes:keywords", as: :itunes_keywords
35 |       element :"itunes:type", as: :itunes_type
36 | 
37 |       # New URL for the podcast feed
38 |       element :"itunes:new_feed_url", as: :itunes_new_feed_url
39 |       element :"itunes:subtitle", as: :itunes_subtitle
40 | 
41 |       # If summary is not present, use the description tag
42 |       element :"itunes:summary", as: :itunes_summary
43 | 
44 |       # iTunes RSS feeds can have multiple main categories and multiple
45 |       # sub-categories per category.
46 |       elements :"itunes:category", as: :_itunes_categories,
47 |                                    class: ITunesRSSCategory
48 |       private :_itunes_categories
49 | 
50 |       def itunes_categories
51 |         _itunes_categories.flat_map do |itunes_category|
52 |           itunes_category.enum_for(:each_subcategory).to_a
53 |         end
54 |       end
55 | 
56 |       def itunes_category_paths
57 |         _itunes_categories.flat_map do |itunes_category|
58 |           itunes_category.enum_for(:each_path).to_a
59 |         end
60 |       end
61 | 
62 |       elements :"itunes:owner", as: :itunes_owners, class: ITunesRSSOwner
63 |       elements :item, as: :entries, class: ITunesRSSItem
64 | 
65 |       def self.able_to_parse?(xml)
66 |         %r{xmlns:itunes\s?=\s?["']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd["']}i =~ xml
67 |       end
68 |     end
69 |   end
70 | end
71 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/rss_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Parser::RSS do
 6 |   describe "#will_parse?" do
 7 |     it "returns true for an RSS feed" do
 8 |       expect(described_class).to be_able_to_parse(sample_rss_feed)
 9 |     end
10 | 
11 |     it "returns false for an atom feed" do
12 |       expect(described_class).not_to be_able_to_parse(sample_atom_feed)
13 |     end
14 | 
15 |     it "returns false for an rss feedburner feed" do
16 |       able = described_class.able_to_parse? sample_rss_feed_burner_feed
17 |       expect(able).to be false
18 |     end
19 |   end
20 | 
21 |   describe "parsing" do
22 |     before do
23 |       @feed = described_class.parse(sample_rss_feed)
24 |     end
25 | 
26 |     it "parses the version" do
27 |       expect(@feed.version).to eq "2.0"
28 |     end
29 | 
30 |     it "parses the title" do
31 |       expect(@feed.title).to eq "Tender Lovemaking"
32 |     end
33 | 
34 |     it "parses the description" do
35 |       expect(@feed.description).to eq "The act of making love, tenderly."
36 |     end
37 | 
38 |     it "parses the url" do
39 |       expect(@feed.url).to eq "http://tenderlovemaking.com"
40 |     end
41 | 
42 |     it "parses the ttl" do
43 |       expect(@feed.ttl).to eq "60"
44 |     end
45 | 
46 |     it "parses the last build date" do
47 |       expect(@feed.last_built).to eq "Sat, 07 Sep 2002 09:42:31 GMT"
48 |     end
49 | 
50 |     it "parses the hub urls" do
51 |       expect(@feed.hubs.count).to eq 1
52 |       expect(@feed.hubs.first).to eq "http://pubsubhubbub.appspot.com/"
53 |     end
54 | 
55 |     it "provides an accessor for the feed_url" do
56 |       expect(@feed).to respond_to :feed_url
57 |       expect(@feed).to respond_to :feed_url=
58 |     end
59 | 
60 |     it "parses the language" do
61 |       expect(@feed.language).to eq "en"
62 |     end
63 | 
64 |     it "parses the image url" do
65 |       expect(@feed.image.url).to eq "https://tenderlovemaking.com/images/header-logo-text-trimmed.png"
66 |     end
67 | 
68 |     it "parses the image title" do
69 |       expect(@feed.image.title).to eq "Tender Lovemaking"
70 |     end
71 | 
72 |     it "parses the image link" do
73 |       expect(@feed.image.link).to eq "http://tenderlovemaking.com"
74 |     end
75 | 
76 |     it "parses the image width" do
77 |       expect(@feed.image.width).to eq "766"
78 |     end
79 | 
80 |     it "parses the image height" do
81 |       expect(@feed.image.height).to eq "138"
82 |     end
83 | 
84 |     it "parses the image description" do
85 |       expect(@feed.image.description).to eq "The act of making love, tenderly."
86 |     end
87 | 
88 |     it "parses entries" do
89 |       expect(@feed.entries.size).to eq 10
90 |     end
91 |   end
92 | end
93 | 


--------------------------------------------------------------------------------
/spec/feedjira/feed_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Feed do
 6 |   describe ".add_common_feed_element" do
 7 |     before(:all) do
 8 |       described_class.add_common_feed_element("generator")
 9 |     end
10 | 
11 |     it "parses the added element out of Atom feeds" do
12 |       expect(Feedjira.parse(sample_wfw_feed).generator).to eq "TypePad"
13 |     end
14 | 
15 |     it "parses the added element out of Atom Feedburner feeds" do
16 |       expect(Feedjira::Parser::Atom.new).to respond_to(:generator)
17 |     end
18 | 
19 |     it "parses the added element out of RSS feeds" do
20 |       expect(Feedjira::Parser::RSS.new).to respond_to(:generator)
21 |     end
22 |   end
23 | 
24 |   describe ".add_common_feed_elements" do
25 |     before do
26 |       described_class.add_common_feed_elements(:foos)
27 |     end
28 | 
29 |     it "parses the added element out of Atom feeds" do
30 |       expect(Feedjira.parse(sample_wfw_feed).foos).to eq []
31 |     end
32 | 
33 |     it "parses the added element out of Atom Feedburner feeds" do
34 |       expect(Feedjira::Parser::Atom.new).to respond_to(:foos)
35 |     end
36 | 
37 |     it "parses the added element out of RSS feeds" do
38 |       expect(Feedjira::Parser::RSS.new).to respond_to(:foos)
39 |     end
40 |   end
41 | 
42 |   describe ".add_common_feed_entry_element" do
43 |     before(:all) do
44 |       tag = "wfw:commentRss"
45 |       described_class.add_common_feed_entry_element tag, as: :comment_rss
46 |     end
47 | 
48 |     it "parses the added element out of Atom feeds entries" do
49 |       entry = Feedjira.parse(sample_wfw_feed).entries.first
50 |       expect(entry.comment_rss).to eq "this is the new val"
51 |     end
52 | 
53 |     it "parses the added element out of Atom Feedburner feeds entries" do
54 |       expect(Feedjira::Parser::AtomEntry.new).to respond_to(:comment_rss)
55 |     end
56 | 
57 |     it "parses the added element out of RSS feeds entries" do
58 |       expect(Feedjira::Parser::RSSEntry.new).to respond_to(:comment_rss)
59 |     end
60 |   end
61 | 
62 |   describe ".add_common_feed_entry_elements" do
63 |     before do
64 |       described_class.add_common_feed_entry_elements(:things)
65 |     end
66 | 
67 |     it "parses the added element out of Atom feeds entries" do
68 |       entry = Feedjira.parse(sample_wfw_feed).entries.first
69 |       expect(entry.things).to eq []
70 |     end
71 | 
72 |     it "parses the added element out of Atom Feedburner feeds entries" do
73 |       expect(Feedjira::Parser::AtomEntry.new).to respond_to(:things)
74 |     end
75 | 
76 |     it "parses the added element out of RSS feeds entries" do
77 |       expect(Feedjira::Parser::RSSEntry.new).to respond_to(:things)
78 |     end
79 |   end
80 | end
81 | 


--------------------------------------------------------------------------------
/lib/feedjira.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "zlib"
 4 | require "sax-machine"
 5 | require "loofah"
 6 | require "logger"
 7 | require "json"
 8 | 
 9 | require_relative "feedjira/util"
10 | require_relative "feedjira/util/parse_time"
11 | require_relative "feedjira/configuration"
12 | require_relative "feedjira/feed_entry_utilities"
13 | require_relative "feedjira/feed_utilities"
14 | require_relative "feedjira/feed"
15 | require_relative "feedjira/rss_entry_utilities"
16 | require_relative "feedjira/atom_entry_utilities"
17 | require_relative "feedjira/parser"
18 | require_relative "feedjira/parser/globally_unique_identifier"
19 | require_relative "feedjira/parser/rss_entry"
20 | require_relative "feedjira/parser/rss_image"
21 | require_relative "feedjira/parser/rss"
22 | require_relative "feedjira/parser/atom_entry"
23 | require_relative "feedjira/parser/atom"
24 | require_relative "feedjira/preprocessor"
25 | require_relative "feedjira/version"
26 | 
27 | require_relative "feedjira/parser/rss_feed_burner_entry"
28 | require_relative "feedjira/parser/rss_feed_burner"
29 | require_relative "feedjira/parser/podlove_chapter"
30 | require_relative "feedjira/parser/itunes_rss_owner"
31 | require_relative "feedjira/parser/itunes_rss_category"
32 | require_relative "feedjira/parser/itunes_rss_item"
33 | require_relative "feedjira/parser/itunes_rss"
34 | require_relative "feedjira/parser/atom_feed_burner_entry"
35 | require_relative "feedjira/parser/atom_feed_burner"
36 | require_relative "feedjira/parser/atom_google_alerts_entry"
37 | require_relative "feedjira/parser/atom_google_alerts"
38 | require_relative "feedjira/parser/google_docs_atom_entry"
39 | require_relative "feedjira/parser/google_docs_atom"
40 | require_relative "feedjira/parser/atom_youtube_entry"
41 | require_relative "feedjira/parser/atom_youtube"
42 | require_relative "feedjira/parser/json_feed"
43 | require_relative "feedjira/parser/json_feed_item"
44 | 
45 | # Feedjira
46 | module Feedjira
47 |   NoParserAvailable = Class.new(StandardError)
48 | 
49 |   extend Configuration
50 | 
51 |   # Parse XML with first compatible parser
52 |   #
53 |   # @example
54 |   #   xml = HTTParty.get("http://example.com").body
55 |   #   Feedjira.parse(xml)
56 |   def parse(xml, parser: nil, &block)
57 |     parser ||= parser_for_xml(xml)
58 | 
59 |     if parser.nil?
60 |       raise NoParserAvailable, "No valid parser for XML."
61 |     end
62 | 
63 |     parser.parse(xml, &block)
64 |   end
65 |   module_function :parse
66 | 
67 |   # Find compatible parser for given XML
68 |   #
69 |   # @example
70 |   #   xml = HTTParty.get("http://example.com").body
71 |   #   parser = Feedjira.parser_for_xml(xml)
72 |   #   parser.parse(xml)
73 |   def parser_for_xml(xml)
74 |     Feedjira.parsers.detect { |klass| klass.able_to_parse?(xml) }
75 |   end
76 |   module_function :parser_for_xml
77 | end
78 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/i_tunes_rss_item_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "spec_helper"
 4 | 
 5 | describe Feedjira::Parser::ITunesRSSItem do
 6 |   before do
 7 |     # I don't really like doing it this way because these unit test should only
 8 |     # rely on ITunesRssItem, but this is actually how it should work. You would
 9 |     # never just pass entry xml straight to the ITunesRssItem
10 |     @item = Feedjira::Parser::ITunesRSS.parse(sample_itunes_feed).entries.first
11 |   end
12 | 
13 |   it "parses the title" do
14 |     expect(@item.title).to eq "Shake Shake Shake Your Spices"
15 |   end
16 | 
17 |   it "parses the itunes title" do
18 |     expect(@item.itunes_title).to eq "Shake Shake Shake Your Spices"
19 |   end
20 | 
21 |   it "parses the author" do
22 |     expect(@item.itunes_author).to eq "John Doe"
23 |   end
24 | 
25 |   it "parses the subtitle" do
26 |     expect(@item.itunes_subtitle).to eq "A short primer on table spices"
27 |   end
28 | 
29 |   it "parses the summary" do
30 |     summary = "This week we talk about salt and pepper shakers, comparing and contrasting pour rates, construction materials, and overall aesthetics. Come and join the party!"
31 |     expect(@item.itunes_summary).to eq summary
32 |   end
33 | 
34 |   it "parses the itunes season" do
35 |     expect(@item.itunes_season).to eq "1"
36 |   end
37 | 
38 |   it "parses the itunes episode number" do
39 |     expect(@item.itunes_episode).to eq "3"
40 |   end
41 | 
42 |   it "parses the itunes episode type" do
43 |     expect(@item.itunes_episode_type).to eq "full"
44 |   end
45 | 
46 |   it "parses the enclosure" do
47 |     expect(@item.enclosure_length).to eq "8727310"
48 |     expect(@item.enclosure_type).to eq "audio/x-m4a"
49 |     expect(@item.enclosure_url).to eq "http://example.com/podcasts/everything/AllAboutEverythingEpisode3.m4a"
50 |   end
51 | 
52 |   it "parses the guid as id" do
53 |     expect(@item.id).to eq "http://example.com/podcasts/archive/aae20050615.m4a"
54 |   end
55 | 
56 |   it "parses the published date" do
57 |     published = Feedjira::Util::ParseTime.call "Wed Jun 15 19:00:00 UTC 2005"
58 |     expect(@item.published).to eq published
59 |   end
60 | 
61 |   it "parses the duration" do
62 |     expect(@item.itunes_duration).to eq "7:04"
63 |   end
64 | 
65 |   it "parses the keywords" do
66 |     expect(@item.itunes_keywords).to eq "salt, pepper, shaker, exciting"
67 |   end
68 | 
69 |   it "parses the image" do
70 |     expect(@item.itunes_image).to eq "http://example.com/podcasts/everything/AllAboutEverything.jpg"
71 |   end
72 | 
73 |   it "parses the order" do
74 |     expect(@item.itunes_order).to eq "12"
75 |   end
76 | 
77 |   it "parses the closed captioned flag" do
78 |     expect(@item.itunes_closed_captioned).to eq "yes"
79 |   end
80 | 
81 |   it "parses the encoded content" do
82 |     content = "<p><strong>TOPIC</strong>: Gooseneck Options</p>"
83 |     expect(@item.content).to eq content
84 |   end
85 | end
86 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/a10.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <rss xmlns:a10="http://www.w3.org/2005/Atom" version="2.0">
 3 |   <channel>
 4 |     <title>Some Title</title>
 5 |     <description>Some Description</description>
 6 |     <lastBuildDate>Thu, 14 May 2020 10:00:18 Z</lastBuildDate>
 7 |     <category>Some Category</category>
 8 |     <a10:link href="http://www.example.com/" />
 9 |     <pubDate>Sat, 16 May 2020 08:50:40 GMT</pubDate>
10 |     <item>
11 |       <title>Title 5</title>
12 |       <description>Description 5</description>
13 |       <pubDate>Thu, 14 May 2020 10:00:18 Z</pubDate>
14 |       <a10:link href="http://www.example.com/5" />
15 |       <a10:author>
16 |         <a10:name>John Doe</a10:name>
17 |         <a10:uri>http://www.example.com/</a10:uri>
18 |         <a10:email>john.doe@example.com</a10:email>
19 |       </a10:author>
20 |       <a10:updated>2020-05-14T10:00:18Z</a10:updated>
21 |     </item>
22 |     <item>
23 |       <title>Title 4</title>
24 |       <description>Description 4</description>
25 |       <pubDate>Wed, 13 May 2020 10:17:57 Z</pubDate>
26 |       <a10:link href="http://www.example.com/4" />
27 |       <a10:author>
28 |         <a10:name>John Doe</a10:name>
29 |         <a10:uri>http://www.example.com/</a10:uri>
30 |         <a10:email>john.doe@example.com</a10:email>
31 |       </a10:author>
32 |       <a10:updated>2020-05-13T10:17:57Z</a10:updated>
33 |     </item>
34 |     <item>
35 |       <title>Title 3</title>
36 |       <description>Dfescription 3</description>
37 |       <pubDate>Tue, 12 May 2020 15:00:00 Z</pubDate>
38 |       <a10:link href="http://www.example.com/3" />
39 |       <a10:author>
40 |         <a10:name>John Doe</a10:name>
41 |         <a10:uri>http://www.example.com/</a10:uri>
42 |         <a10:email>john.doe@example.com</a10:email>
43 |       </a10:author>
44 |       <a10:updated>2020-05-12T15:00:00Z</a10:updated>
45 |     </item>
46 |     <item>
47 |       <title>Title 2</title>
48 |       <description>Description 2</description>
49 |       <pubDate>Tue, 12 May 2020 07:52:36 Z</pubDate>
50 |       <a10:link href="http://www.example.com/2" />
51 |       <a10:author>
52 |         <a10:name>John Doe</a10:name>
53 |         <a10:uri>http://www.example.com/</a10:uri>
54 |         <a10:email>john.doe@example.com</a10:email>
55 |       </a10:author>
56 |       <a10:updated>2020-05-12T07:52:36Z</a10:updated>
57 |     </item>
58 |     <item>
59 |       <title>Title 1</title>
60 |       <description>Description 1</description>
61 |       <pubDate>Thu, 07 May 2020 07:36:53 Z</pubDate>
62 |       <a10:link href="http://www.example.com/1" />
63 |       <a10:author>
64 |         <a10:name>John Doe</a10:name>
65 |         <a10:uri>http://www.example.com/</a10:uri>
66 |         <a10:email>john.doe@example.com</a10:email>
67 |       </a10:author>
68 |       <a10:updated>2020-05-07T07:36:53Z</a10:updated>
69 |     </item>
70 |   </channel>
71 | </rss>
72 | 
73 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/atom_youtube_entry_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
 4 | 
 5 | describe Feedjira::Parser::AtomYoutubeEntry do
 6 |   describe "parsing" do
 7 |     before do
 8 |       @feed = Feedjira::Parser::AtomYoutube.parse(sample_youtube_atom_feed)
 9 |       @entry = @feed.entries.first
10 |     end
11 | 
12 |     it "has the title" do
13 |       expect(@entry.title).to eq "The Google app: Questions Title"
14 |     end
15 | 
16 |     it "has the url" do
17 |       expect(@entry.url).to eq "http://www.youtube.com/watch?v=5shykyfmb28"
18 |     end
19 | 
20 |     it "has the entry id" do
21 |       expect(@entry.entry_id).to eq "yt:video:5shykyfmb28"
22 |     end
23 | 
24 |     it "has the published date" do
25 |       expect(@entry.published).to eq Feedjira::Util::ParseTime.call("2015-05-04T00:01:27+00:00")
26 |     end
27 | 
28 |     it "has the updated date" do
29 |       expect(@entry.updated).to eq Feedjira::Util::ParseTime.call("2015-05-13T17:38:30+00:00")
30 |     end
31 | 
32 |     it "has the content populated from the media:description element" do
33 |       expect(@entry.content).to eq "A question is the most powerful force in the world. It can start you on an adventure or spark a connection. See where a question can take you. The Google app is available on iOS and Android. Download the app here: http://www.google.com/search/about/download"
34 |     end
35 | 
36 |     it "has the summary but blank" do
37 |       expect(@entry.summary).to be_nil
38 |     end
39 | 
40 |     it "has the custom youtube video id" do
41 |       expect(@entry.youtube_video_id).to eq "5shykyfmb28"
42 |     end
43 | 
44 |     it "has the custom media title" do
45 |       expect(@entry.media_title).to eq "The Google app: Questions"
46 |     end
47 | 
48 |     it "has the custom media url" do
49 |       expect(@entry.media_url).to eq "https://www.youtube.com/v/5shykyfmb28?version=3"
50 |     end
51 | 
52 |     it "has the custom media type" do
53 |       expect(@entry.media_type).to eq "application/x-shockwave-flash"
54 |     end
55 | 
56 |     it "has the custom media width" do
57 |       expect(@entry.media_width).to eq "640"
58 |     end
59 | 
60 |     it "has the custom media height" do
61 |       expect(@entry.media_height).to eq "390"
62 |     end
63 | 
64 |     it "has the custom media thumbnail url" do
65 |       expect(@entry.media_thumbnail_url).to eq "https://i2.ytimg.com/vi/5shykyfmb28/hqdefault.jpg"
66 |     end
67 | 
68 |     it "has the custom media thumbnail width" do
69 |       expect(@entry.media_thumbnail_width).to eq "480"
70 |     end
71 | 
72 |     it "has the custom media thumbnail height" do
73 |       expect(@entry.media_thumbnail_height).to eq "360"
74 |     end
75 | 
76 |     it "has the custom media star count" do
77 |       expect(@entry.media_star_count).to eq "3546"
78 |     end
79 | 
80 |     it "has the custom media star average" do
81 |       expect(@entry.media_star_average).to eq "4.79"
82 |     end
83 | 
84 |     it "has the custom media views" do
85 |       expect(@entry.media_views).to eq "251497"
86 |     end
87 |   end
88 | end
89 | 


--------------------------------------------------------------------------------
/lib/feedjira/feed_utilities.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module Feedjira
  4 |   module FeedUtilities
  5 |     UPDATABLE_ATTRIBUTES = %w[title feed_url url last_modified etag].freeze
  6 | 
  7 |     attr_writer   :new_entries, :updated, :last_modified
  8 |     attr_accessor :etag
  9 | 
 10 |     def self.included(base)
 11 |       base.extend ClassMethods
 12 |     end
 13 | 
 14 |     module ClassMethods
 15 |       def parse(xml, &)
 16 |         xml = strip_whitespace(xml)
 17 |         xml = preprocess(xml) if preprocess_xml
 18 |         super(xml, &)
 19 |       end
 20 | 
 21 |       def preprocess(xml)
 22 |         # noop
 23 |         xml
 24 |       end
 25 | 
 26 |       def preprocess_xml=(value)
 27 |         @preprocess_xml = value
 28 |       end
 29 | 
 30 |       def preprocess_xml
 31 |         @preprocess_xml
 32 |       end
 33 | 
 34 |       def strip_whitespace(xml)
 35 |         if Feedjira.strip_whitespace
 36 |           xml.strip
 37 |         else
 38 |           xml.lstrip
 39 |         end
 40 |       end
 41 |     end
 42 | 
 43 |     def last_modified
 44 |       @last_modified ||= entries.reject { |e| e.published.nil? }.max_by(&:published)&.published
 45 |     end
 46 | 
 47 |     def updated?
 48 |       @updated || false
 49 |     end
 50 | 
 51 |     def new_entries
 52 |       @new_entries ||= []
 53 |     end
 54 | 
 55 |     def new_entries?
 56 |       !new_entries.empty?
 57 |     end
 58 | 
 59 |     def update_from_feed(feed)
 60 |       self.new_entries += find_new_entries_for(feed)
 61 |       entries.unshift(*self.new_entries)
 62 | 
 63 |       @updated = false
 64 | 
 65 |       UPDATABLE_ATTRIBUTES.each do |name|
 66 |         @updated ||= update_attribute(feed, name)
 67 |       end
 68 |     end
 69 | 
 70 |     def update_attribute(feed, name)
 71 |       old_value = send(name)
 72 |       new_value = feed.send(name)
 73 | 
 74 |       if old_value == new_value
 75 |         false
 76 |       else
 77 |         send(:"#{name}=", new_value)
 78 |         true
 79 |       end
 80 |     end
 81 | 
 82 |     def sanitize_entries!
 83 |       entries.each(&:sanitize!)
 84 |     end
 85 | 
 86 |     private
 87 | 
 88 |     # This implementation is a hack, which is why it's so ugly. It's to get
 89 |     # around the fact that not all feeds have a published date. However,
 90 |     # they're always ordered with the newest one first. So we go through the
 91 |     # entries just parsed and insert each one as a new entry until we get to
 92 |     # one that has the same id as the the newest for the feed.
 93 |     def find_new_entries_for(feed)
 94 |       return feed.entries if entries.empty?
 95 | 
 96 |       latest_entry = entries.first
 97 |       found_new_entries = []
 98 | 
 99 |       feed.entries.each do |entry|
100 |         break unless new_entry?(entry, latest_entry)
101 | 
102 |         found_new_entries << entry
103 |       end
104 | 
105 |       found_new_entries
106 |     end
107 | 
108 |     def new_entry?(entry, latest)
109 |       nil_ids = entry.entry_id.nil? && latest.entry_id.nil?
110 |       new_id = entry.entry_id != latest.entry_id
111 |       new_url = entry.url != latest.url
112 | 
113 |       (nil_ids || new_id) && new_url
114 |     end
115 |   end
116 | end
117 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/AmazonWebServicesBlogFirstEntryContent.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | <div xmlns="http://www.w3.org/1999/xhtml"><p>
 3 |   Late last year an entrepreneur from Turkey visited me at Amazon HQ in Seattle.
 4 | 
 5 |   We talked about his plans to use AWS as part of his new social video
 6 |   portal startup. I won't spill any beans before he's ready to talk
 7 |   about it himself, but I will say that he has a really good concept,
 8 |   strong backers, and infectious enthusiasm for the online world.</p>
 9 | </p>
10 | 
11 | <p>
12 |   He's now ready to hire a software architect and designer in order to
13 |   bring his vision to life. I've posted the job below; you can 
14 |   send your resume to <a href="mailto:apply@web.tv">apply@web.tv</a>
15 |   if you are interested, qualified, and located in the right part
16 |   of the world.
17 | </p>
18 | 
19 | <div style="padding:10px;margin-left:20px;margin-right:20px;margin-bottom:10px;border:1px dotted black;background-color:#e0e0e0;">
20 | 
21 | <div style="text-align:center;font-size:120%;font-weight:bold;">Software Architect & Designer</div>
22 | 
23 | <p>
24 |   We are a reputable Internet technology, software services and e-commerce company based
25 |   in
26 |   <a href="http://en.wikipedia.org/wiki/Istanbul">Istanbul</a> and
27 |   <a href="http://en.wikipedia.org/wiki/Bursa">Bursa</a>,
28 |   <a href="http://en.wikipedia.org/wiki/Turkey">Turkey</a>.
29 |   We are looking for a talented Software Architect who will
30 |   be working in Istanbul for a certain period of time, for our new global scale
31 |   "social video portal" project. Below are the qualifications required and job
32 |   description for the position to be held. 
33 | <p>
34 | 
35 | <style>
36 |   .li20090116 {padding-bottom:6px;}
37 | </style>
38 | 
39 | <p>Qualifications:</p>
40 | 
41 | <ul>
42 |   <li class="li20090116" >Extensive knowledge of web technologies.</li>
43 |   <li class="li20090116">Experienced in web based application design and development.</li>
44 |   <li class="li20090116">Solid bacground in object oriented design and development.</li>
45 |   <li class="li20090116">Preferrably experienced in live broadcasting over the internet, video streaming, video sharing and social networking web site development and design.</li>
46 |   <li class="li20090116">Knowledge and experience of design and development of multi-tier, distributed, massively multi-user systems.</li>
47 |   <li class="li20090116">Experienced in Cloud Computing applications (preferably with AWS).</li>
48 |   <li class="li20090116">Very good command of PHP or Python.</li>
49 |   <li class="li20090116">Experinced in relational database design.</li>
50 |   <li class="li20090116">Familarity with Erlang, and knowledge or experience of Java, C/C++, Ajax, Adobe Flex, mySQL is a plus.</li>
51 |   <li class="li20090116">Self motivated, enthusiastic, team player.</li>
52 | </ul>
53 | 
54 | <p>Job Description:</p>
55 | <ul>
56 |   <li class="li20090116">Will be mainly responsible for designing the overall system for a multi-tier, massively multi-user live video multi-casting, videosharing web site which will also have features of a social network.</li>
57 |   <li class="li20090116">Will be involved in Design and Development phases of software development cycle. Will contribute to the Analysis phase.</li>
58 |   <li class="li20090116">Will lead the Software Development Team for the period of the contract and report to the Project Coordinator.</li>
59 | </ul>
60 | </div>
61 | 
62 | <p>-- Jeff;</p>
63 | </div>
64 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/ITunesWithSpacesInAttributes.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | <?xml version="1.0" encoding="UTF-8"?>
 3 | <rss 
 4 |   xmlns:itunes = "http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0"
 5 |   >
 6 |  
 7 | <channel>
 8 | <title>All About Everything</title>
 9 | <link>http://www.example.com/podcasts/everything/index.html</link>
10 | <language>en-us</language>
11 | <copyright>&#x2117; &amp; &#xA9; 2005 John Doe &amp; Family</copyright>
12 | <itunes:subtitle>A show about everything</itunes:subtitle>
13 | <itunes:author>John Doe</itunes:author>
14 | <itunes:summary>All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store</itunes:summary>
15 | <description>All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store</description>
16 | <itunes:owner>
17 | <itunes:name>John Doe</itunes:name>
18 | <itunes:email>john.doe@example.com</itunes:email>
19 | </itunes:owner>
20 | <itunes:image href="http://example.com/podcasts/everything/AllAboutEverything.jpg" />
21 | <itunes:category text="Technology">
22 | <itunes:category text="Gadgets"/>
23 | </itunes:category>
24 | <itunes:category text="TV &amp; Film"/>
25 |  
26 | <item>
27 | <title>Shake Shake Shake Your Spices</title>
28 | <itunes:author>John Doe</itunes:author>
29 | <itunes:subtitle>A short primer on table spices</itunes:subtitle>
30 | <itunes:summary>This week we talk about salt and pepper shakers, comparing and contrasting pour rates, construction materials, and overall aesthetics. Come and join the party!</itunes:summary>
31 | <enclosure url="http://example.com/podcasts/everything/AllAboutEverythingEpisode3.m4a" length="8727310" type="audio/x-m4a" />
32 | <guid>http://example.com/podcasts/archive/aae20050615.m4a</guid>
33 | <pubDate>Wed, 15 Jun 2005 19:00:00 GMT</pubDate>
34 | <itunes:duration>7:04</itunes:duration>
35 | <itunes:keywords>salt, pepper, shaker, exciting</itunes:keywords>
36 | </item>
37 |  
38 | <item>
39 | <title>Socket Wrench Shootout</title>
40 | <itunes:author>Jane Doe</itunes:author>
41 | <itunes:subtitle>Comparing socket wrenches is fun!</itunes:subtitle>
42 | <itunes:summary>This week we talk about metric vs. old english socket wrenches. Which one is better? Do you really need both? Get all of your answers here.</itunes:summary>
43 | <enclosure url="http://example.com/podcasts/everything/AllAboutEverythingEpisode2.mp3" length="5650889" type="audio/mpeg" />
44 | <guid>http://example.com/podcasts/archive/aae20050608.mp3</guid>
45 | <pubDate>Wed, 8 Jun 2005 19:00:00 GMT</pubDate>
46 | <itunes:duration>4:34</itunes:duration>
47 | <itunes:keywords>metric, socket, wrenches, tool</itunes:keywords>
48 | </item>
49 |  
50 | <item>
51 | <title>Red, Whine, &amp; Blue</title>
52 | <itunes:author>Various</itunes:author>
53 | <itunes:subtitle>Red + Blue != Purple</itunes:subtitle>
54 | <itunes:summary>This week we talk about surviving in a Red state if you are a Blue person. Or vice versa.</itunes:summary>
55 | <enclosure url="http://example.com/podcasts/everything/AllAboutEverythingEpisode1.mp3" length="4989537" type="audio/mpeg" />
56 | <guid>http://example.com/podcasts/archive/aae20050601.mp3</guid>
57 | <pubDate>Wed, 1 Jun 2005 19:00:00 GMT</pubDate>
58 | <itunes:duration>3:59</itunes:duration>
59 | <itunes:keywords>politics, red, blue, state</itunes:keywords>
60 | </item>
61 |  
62 | </channel>
63 | </rss>
64 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, gender identity and expression, level of experience,
 9 | nationality, personal appearance, race, religion, or sexual identity and
10 | orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at mikeastock@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at [http://contributor-covenant.org/version/1/4][version]
72 | 
73 | [homepage]: http://contributor-covenant.org
74 | [version]: http://contributor-covenant.org/version/1/4/
75 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/AtomFeedWithSpacesAroundEquals.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 3 | <!--
 4 | This atom/xml feed is an index to active advisories, watches and warnings 
 5 | issued by the National Weather Service.  This index file is not the complete 
 6 | Common Alerting Protocol (CAP) alert message.  To obtain the complete CAP 
 7 | alert, please follow the links for each entry in this index.  Also note the 
 8 | CAP message uses a style sheet to convey the information in a human readable 
 9 | format.  Please view the source of the CAP message to see the complete data 
10 | set.  Not all information in the CAP message is contained in this index of 
11 | active alerts.
12 | -->
13 | <feed 
14 |   xmlns = "http://www.w3.org/2005/Atom" 
15 |   xmlns:cap = "urn:oasis:names:tc:emergency:cap:1.1" 
16 |   xmlns:ha = "http://www.alerting.net/namespace/index_1.0"
17 |   >
18 |   <!-- TZN = <EST> -->
19 |   <!-- TZO = <-5> -->
20 |   <!-- http-date = Wed, 07 Nov 2012 02:17:00 GMT -->
21 |   <id>http://alerts.weather.gov/cap/wwaatmget.php?x=MEC015&amp;y=0</id>
22 |   <generator>NWS CAP Server</generator>
23 |   <updated>2012-11-07T09:17:00-05:00</updated>
24 |   <author>
25 |     <name>w-nws.webmaster@noaa.gov</name>
26 |   </author>
27 |   <title>Current Watches, Warnings and Advisories for Lincoln (MEC015) Maine Issued by the National Weather Service</title>
28 |   <link href="http://alerts.weather.gov/cap/wwaatmget.php?x=MEC015&amp;y=0"/>
29 |   <entry>
30 |     <id>http://alerts.weather.gov/cap/wwacapget.php?x=ME124CCF70CDD4.WinterWeatherAdvisory.124CCF729F10ME.CARWSWCAR.b113b32cf3dd0946aab63451118d16e7</id>
31 |     <updated>2012-11-07T04:09:00-05:00</updated>
32 |     <published>2012-11-07T04:09:00-05:00</published>
33 |     <author>
34 |       <name>w-nws.webmaster@noaa.gov</name>
35 |     </author>
36 |     <title>Winter Weather Advisory issued November 07 at 4:09AM EST until November 08 at 12:00PM EST by NWS</title>
37 |     <link href="http://alerts.weather.gov/cap/wwacapget.php?x=ME124CCF70CDD4.WinterWeatherAdvisory.124CCF729F10ME.CARWSWCAR.b113b32cf3dd0946aab63451118d16e7"/>
38 |     <summary>...WINTER WEATHER ADVISORY IN EFFECT FROM 7 PM THIS EVENING TO NOON EST THURSDAY... THE NATIONAL WEATHER SERVICE IN CARIBOU HAS ISSUED A WINTER WEATHER ADVISORY FOR SNOW AND MIXED PRECIPITATION...WHICH IS IN EFFECT FROM 7 PM THIS EVENING TO NOON EST THURSDAY. * PRECIPITATION TYPE...SNOW...SLEET AND FREEZING RAIN</summary>
39 |     <cap:event>Winter Weather Advisory</cap:event>
40 |     <cap:effective>2012-11-07T04:09:00-05:00</cap:effective>
41 |     <cap:expires>2012-11-07T16:00:00-05:00</cap:expires>
42 |     <cap:status>Actual</cap:status>
43 |     <cap:msgType>Alert</cap:msgType>
44 |     <cap:category>Met</cap:category>
45 |     <cap:urgency>Expected</cap:urgency>
46 |     <cap:severity>Minor</cap:severity>
47 |     <cap:certainty>Likely</cap:certainty>
48 |     <cap:areaDesc>Central Penobscot; Central Washington; Interior Hancock; Northern Penobscot; Northern Washington; Southeast Aroostook; Southern Penobscot; Southern Piscataquis</cap:areaDesc>
49 |     <cap:polygon/>
50 |     <cap:geocode>
51 |       <valueName>FIPS6</valueName>
52 |       <value>023003 023009 023019 023021 023029</value>
53 |       <valueName>UGC</valueName>
54 |       <value>MEZ005 MEZ006 MEZ011 MEZ015 MEZ016 MEZ017 MEZ031 MEZ032</value>
55 |     </cap:geocode>
56 |     <cap:parameter>
57 |       <valueName>VTEC</valueName>
58 |       <value>/O.NEW.KCAR.WW.Y.0024.121108T0000Z-121108T1700Z/</value>
59 |     </cap:parameter>
60 |   </entry>
61 | </feed>
62 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/atom_entry_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require "spec_helper"
  4 | 
  5 | describe Feedjira::Parser::AtomEntry do
  6 |   before do
  7 |     # I don't really like doing it this way because these unit test should only
  8 |     # rely on AtomEntry, but this is actually how it should work. You would
  9 |     # never just pass entry xml straight to the AtomEnry
 10 |     @entry = Feedjira::Parser::Atom.parse(sample_atom_feed).entries.first
 11 |   end
 12 | 
 13 |   it "parses the title" do
 14 |     title = "AWS Job: Architect & Designer Position in Turkey"
 15 |     expect(@entry.title).to eq title
 16 |   end
 17 | 
 18 |   it "parses the url" do
 19 |     expect(@entry.url).to eq "http://aws.typepad.com/aws/2009/01/aws-job-architect-designer-position-in-turkey.html"
 20 |   end
 21 | 
 22 |   it "parses the url even when" do
 23 |     xml = load_sample("atom_with_link_tag_for_url_unmarked.xml")
 24 |     entries = Feedjira::Parser::Atom.parse(xml).entries
 25 |     expect(entries.first.url).to eq "http://www.innoq.com/blog/phaus/2009/07/ja.html"
 26 |   end
 27 | 
 28 |   it "parses the author" do
 29 |     expect(@entry.author).to eq "AWS Editor"
 30 |   end
 31 | 
 32 |   it "parses the content" do
 33 |     expect(@entry.content).to eq sample_atom_entry_content
 34 |   end
 35 | 
 36 |   it "provides a summary" do
 37 |     summary = "Late last year an entrepreneur from Turkey visited me at Amazon HQ in Seattle. We talked about his plans to use AWS as part of his new social video portal startup. I won't spill any beans before he's ready to..."
 38 |     expect(@entry.summary).to eq summary
 39 |   end
 40 | 
 41 |   it "parses the published date" do
 42 |     published = Feedjira::Util::ParseTime.call "Fri Jan 16 18:21:00 UTC 2009"
 43 |     expect(@entry.published).to eq published
 44 |   end
 45 | 
 46 |   it "parses the categories" do
 47 |     expect(@entry.categories).to eq %w[Turkey Seattle]
 48 |   end
 49 | 
 50 |   it "parses the updated date" do
 51 |     updated = Feedjira::Util::ParseTime.call "Fri Jan 16 18:21:00 UTC 2009"
 52 |     expect(@entry.updated).to eq updated
 53 |   end
 54 | 
 55 |   it "parses the id" do
 56 |     expect(@entry.id).to eq "tag:typepad.com,2003:post-61484736"
 57 |   end
 58 | 
 59 |   it "supports each" do
 60 |     expect(@entry).to respond_to :each
 61 |   end
 62 | 
 63 |   it "is able to list out all fields with each" do
 64 |     all_fields = []
 65 |     title_value = ""
 66 | 
 67 |     @entry.each do |field, value|
 68 |       all_fields << field
 69 |       title_value = value if field == "title"
 70 |     end
 71 | 
 72 |     expect(title_value).to eq "AWS Job: Architect & Designer Position in Turkey"
 73 | 
 74 |     expected_fields = %w[
 75 |       author
 76 |       categories
 77 |       content
 78 |       entry_id
 79 |       links
 80 |       published
 81 |       summary
 82 |       title
 83 |       title_type
 84 |       updated
 85 |       url
 86 |     ]
 87 |     expect(all_fields.sort).to eq expected_fields
 88 |   end
 89 | 
 90 |   it "supports checking if a field exists in the entry" do
 91 |     expect(@entry).to include "author"
 92 |     expect(@entry).to include "title"
 93 |   end
 94 | 
 95 |   it "allows access to fields with hash syntax" do
 96 |     title = "AWS Job: Architect & Designer Position in Turkey"
 97 |     expect(@entry["title"]).to eq title
 98 |     expect(@entry["author"]).to eq "AWS Editor"
 99 |   end
100 | 
101 |   it "allows setting field values with hash syntax" do
102 |     @entry["title"] = "Foobar"
103 |     expect(@entry.title).to eq "Foobar"
104 |   end
105 | end
106 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/rss_feed_burner_entry_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require "spec_helper"
  4 | 
  5 | describe Feedjira::Parser::RSSFeedBurnerEntry do
  6 |   before do
  7 |     tag = "wfw:commentRss"
  8 |     Feedjira::Feed.add_common_feed_entry_element(tag, as: :comment_rss)
  9 |     # I don't really like doing it this way because these unit test should only
 10 |     # rely on RSSEntry, but this is actually how it should work. You would
 11 |     # never just pass entry xml straight to the AtomEnry
 12 |     feed = Feedjira::Parser::RSSFeedBurner.parse sample_rss_feed_burner_feed
 13 |     @entry = feed.entries.first
 14 |   end
 15 | 
 16 |   after do
 17 |     # We change the title in one or more specs to test []=
 18 |     if @entry.title != "Angie’s List Sets Price Range IPO At $11 To $13 Per Share; Valued At Over $600M"
 19 |       feed = Feedjira::Parser::RSS.parse sample_rss_feed_burner_feed
 20 |       @entry.title = feed.entries.first.title
 21 |     end
 22 |   end
 23 | 
 24 |   it "parses the title" do
 25 |     title = "Angie’s List Sets Price Range IPO At $11 To $13 Per Share; Valued At Over $600M"
 26 |     expect(@entry.title).to eq title
 27 |   end
 28 | 
 29 |   it "parses the original url" do
 30 |     expect(@entry.url).to eq "http://techcrunch.com/2011/11/02/angies-list-prices-ipo-at-11-to-13-per-share-valued-at-over-600m/"
 31 |   end
 32 | 
 33 |   it "parses the author" do
 34 |     expect(@entry.author).to eq "Leena Rao"
 35 |   end
 36 | 
 37 |   it "parses the content" do
 38 |     expect(@entry.content).to eq sample_rss_feed_burner_entry_content
 39 |   end
 40 | 
 41 |   it "provides a summary" do
 42 |     expect(@entry.summary).to eq sample_rss_feed_burner_entry_description
 43 |   end
 44 | 
 45 |   it "parses the published date" do
 46 |     published = Feedjira::Util::ParseTime.call "Wed Nov 02 17:25:27 UTC 2011"
 47 |     expect(@entry.published).to eq published
 48 |   end
 49 | 
 50 |   it "parses the categories" do
 51 |     expect(@entry.categories).to eq ["TC", "angie\\'s list"]
 52 |   end
 53 | 
 54 |   it "parses the guid as id" do
 55 |     expect(@entry.id).to eq "http://techcrunch.com/?p=446154"
 56 |   end
 57 | 
 58 |   it "supports each" do
 59 |     expect(@entry).to respond_to :each
 60 |   end
 61 | 
 62 |   it "is able to list out all fields with each" do
 63 |     all_fields = []
 64 |     title_value = ""
 65 | 
 66 |     @entry.each do |field, value|
 67 |       all_fields << field
 68 |       title_value = value if field == "title"
 69 |     end
 70 | 
 71 |     title = "Angie’s List Sets Price Range IPO At $11 To $13 Per Share; Valued At Over $600M"
 72 |     expect(title_value).to eq title
 73 | 
 74 |     expected_fields = %w[
 75 |       author
 76 |       categories
 77 |       comment_rss
 78 |       comments
 79 |       content
 80 |       entry_id
 81 |       image
 82 |       published
 83 |       summary
 84 |       title
 85 |       url
 86 |     ]
 87 |     expect(all_fields.sort).to eq expected_fields
 88 |   end
 89 | 
 90 |   it "supports checking if a field exists in the entry" do
 91 |     expect(@entry).to include "author"
 92 |     expect(@entry).to include "title"
 93 |   end
 94 | 
 95 |   it "allows access to fields with hash syntax" do
 96 |     expect(@entry["author"]).to eq "Leena Rao"
 97 |     title = "Angie’s List Sets Price Range IPO At $11 To $13 Per Share; Valued At Over $600M"
 98 |     expect(@entry["title"]).to eq title
 99 |   end
100 | 
101 |   it "allows setting field values with hash syntax" do
102 |     @entry["title"] = "Foobar"
103 |     expect(@entry.title).to eq "Foobar"
104 |   end
105 | end
106 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | <?xml version='1.0' encoding='UTF-8'?>
 3 | <rss xmlns:itunes='http://www.itunes.com/dtds/podcast-1.0.dtd' version='2.0'>
 4 | 
 5 | <channel>
 6 | <title>All About Everything</title>
 7 | <link>http://www.example.com/podcasts/everything/index.html</link>
 8 | <language>en-us</language>
 9 | <copyright>&#x2117; &amp; &#xA9; 2005 John Doe &amp; Family</copyright>
10 | <itunes:subtitle>A show about everything</itunes:subtitle>
11 | <itunes:new-feed-url>http://example.com/new.xml</itunes:new-feed-url>
12 | <itunes:author>John Doe</itunes:author>
13 | <itunes:summary>All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store</itunes:summary>
14 | <description>All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store</description>
15 | <itunes:owner>
16 | <itunes:name>John Doe</itunes:name>
17 | <itunes:email>john.doe@example.com</itunes:email>
18 | </itunes:owner>
19 | <itunes:image href='http://example.com/podcasts/everything/AllAboutEverything.jpg' />
20 | <itunes:category text='Technology'>
21 | <itunes:category text='Gadgets'/>
22 | <itunes:category text='TV &amp; Film'/>
23 | 
24 | <item>
25 | <title>Shake Shake Shake Your Spices</title>
26 | <itunes:author>John Doe</itunes:author>
27 | <itunes:subtitle>A short primer on table spices</itunes:subtitle>
28 | <itunes:summary>This week we talk about salt and pepper shakers, comparing and contrasting pour rates, construction materials, and overall aesthetics. Come and join the party!</itunes:summary>
29 | <enclosure url='http://example.com/podcasts/everything/AllAboutEverythingEpisode3.m4a' length='8727310' type='audio/x-m4a' />
30 | <guid>http://example.com/podcasts/archive/aae20050615.m4a</guid>
31 | <pubDate>Wed, 15 Jun 2005 19:00:00 GMT</pubDate>
32 | <itunes:duration>7:04</itunes:duration>
33 | <itunes:keywords>salt, pepper, shaker, exciting</itunes:keywords>
34 | <itunes:image href='http://example.com/podcasts/everything/AllAboutEverything.jpg' />
35 | <itunes:order>12</itunes:order>
36 | <itunes:isClosedCaptioned>yes</itunes:isClosedCaptioned>
37 | <content:encoded>&lt;p&gt;&lt;strong&gt;TOPIC&lt;/strong&gt;: Gooseneck Options&lt;/p&gt;</content:encoded>
38 | </item>
39 | 
40 | <item>
41 | <title>Socket Wrench Shootout</title>
42 | <itunes:author>Jane Doe</itunes:author>
43 | <itunes:subtitle>Comparing socket wrenches is fun!</itunes:subtitle>
44 | <itunes:summary>This week we talk about metric vs. old english socket wrenches. Which one is better? Do you really need both? Get all of your answers here.</itunes:summary>
45 | <enclosure url='http://example.com/podcasts/everything/AllAboutEverythingEpisode2.mp3' length='5650889' type='audio/mpeg' />
46 | <guid>http://example.com/podcasts/archive/aae20050608.mp3</guid>
47 | <pubDate>Wed, 8 Jun 2005 19:00:00 GMT</pubDate>
48 | <itunes:duration>4:34</itunes:duration>
49 | <itunes:keywords>metric, socket, wrenches, tool</itunes:keywords>
50 | <itunes:image href='http://example.com/podcasts/everything/AllAboutEverything.jpg' />
51 | </item>
52 | 
53 | <item>
54 | <title>Red, Whine, &amp; Blue</title>
55 | <itunes:author>Various</itunes:author>
56 | <itunes:subtitle>Red + Blue != Purple</itunes:subtitle>
57 | <itunes:summary>This week we talk about surviving in a Red state if you are a Blue person. Or vice versa.</itunes:summary>
58 | <enclosure url='http://example.com/podcasts/everything/AllAboutEverythingEpisode1.mp3' length='4989537' type='audio/mpeg' />
59 | <guid>http://example.com/podcasts/archive/aae20050601.mp3</guid>
60 | <pubDate>Wed, 1 Jun 2005 19:00:00 GMT</pubDate>
61 | <itunes:duration>3:59</itunes:duration>
62 | <itunes:keywords>politics, red, blue, state</itunes:keywords>
63 | <itunes:image href='http://example.com/podcasts/everything/AllAboutEverything.jpg' />
64 | </item>
65 | 
66 | </channel>
67 | </rss>
68 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/itunes_rss_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require "spec_helper"
  4 | 
  5 | module Feedjira
  6 |   module Parser
  7 |     describe "#will_parse?" do
  8 |       it "returns true for an itunes RSS feed" do
  9 |         expect(ITunesRSS).to be_able_to_parse(sample_itunes_feed)
 10 |       end
 11 | 
 12 |       it "returns true for an itunes RSS feed with spaces between attribute names, equals sign, and values" do
 13 |         expect(ITunesRSS).to be_able_to_parse(sample_itunes_feed_with_spaces)
 14 |       end
 15 | 
 16 |       it "returns true for an itunes RSS feed with single-quoted attributes" do
 17 |         expect(ITunesRSS).to be_able_to_parse(sample_itunes_feed_with_single_quotes)
 18 |       end
 19 | 
 20 |       it "returns fase for an atom feed" do
 21 |         expect(ITunesRSS).not_to be_able_to_parse(sample_atom_feed)
 22 |       end
 23 | 
 24 |       it "returns false for an rss feedburner feed" do
 25 |         expect(ITunesRSS).not_to be_able_to_parse(sample_rss_feed_burner_feed)
 26 |       end
 27 |     end
 28 | 
 29 |     describe "parsing" do
 30 |       before do
 31 |         @feed = ITunesRSS.parse(sample_itunes_feed)
 32 |       end
 33 | 
 34 |       it "parses the ttl" do
 35 |         expect(@feed.ttl).to eq "60"
 36 |       end
 37 | 
 38 |       it "parses the last build date" do
 39 |         expect(@feed.last_built).to eq "Sat, 07 Sep 2002 09:42:31 GMT"
 40 |       end
 41 | 
 42 |       it "parses the subtitle" do
 43 |         expect(@feed.itunes_subtitle).to eq "A show about everything"
 44 |       end
 45 | 
 46 |       it "parses the author" do
 47 |         expect(@feed.itunes_author).to eq "John Doe"
 48 |       end
 49 | 
 50 |       it "parses an owner" do
 51 |         expect(@feed.itunes_owners.size).to eq 1
 52 |       end
 53 | 
 54 |       it "parses an image" do
 55 |         expect(@feed.itunes_image).to eq "http://example.com/podcasts/everything/AllAboutEverything.jpg"
 56 |       end
 57 | 
 58 |       it "parses the image url" do
 59 |         expect(@feed.image.url).to eq "http://example.com/podcasts/everything/AllAboutEverything.jpg"
 60 |       end
 61 | 
 62 |       it "parses the image title" do
 63 |         expect(@feed.image.title).to eq "All About Everything"
 64 |       end
 65 | 
 66 |       it "parses the image link" do
 67 |         expect(@feed.image.link).to eq "http://www.example.com/podcasts/everything/index.html"
 68 |       end
 69 | 
 70 |       it "parses the image width" do
 71 |         expect(@feed.image.width).to eq "88"
 72 |       end
 73 | 
 74 |       it "parses the image height" do
 75 |         expect(@feed.image.height).to eq "31"
 76 |       end
 77 | 
 78 |       it "parses the image description" do
 79 |         description = "All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store"
 80 |         expect(@feed.image.description).to eq description
 81 |       end
 82 | 
 83 |       it "parses categories" do
 84 |         expect(@feed.itunes_categories).to eq [
 85 |           "Technology",
 86 |           "Gadgets",
 87 |           "TV & Film",
 88 |           "Arts",
 89 |           "Design",
 90 |           "Food"
 91 |         ]
 92 | 
 93 |         expect(@feed.itunes_category_paths).to eq [
 94 |           %w[Technology Gadgets],
 95 |           ["TV & Film"],
 96 |           %w[Arts Design],
 97 |           %w[Arts Food]
 98 |         ]
 99 |       end
100 | 
101 |       it "parses the itunes type" do
102 |         expect(@feed.itunes_type).to eq "episodic"
103 |       end
104 | 
105 |       it "parses the summary" do
106 |         summary = "All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store"
107 |         expect(@feed.itunes_summary).to eq summary
108 |       end
109 | 
110 |       it "parses the complete tag" do
111 |         expect(@feed.itunes_complete).to eq "yes"
112 |       end
113 | 
114 |       it "parses entries" do
115 |         expect(@feed.entries.size).to eq 3
116 |       end
117 | 
118 |       it "parses the new-feed-url" do
119 |         expect(@feed.itunes_new_feed_url).to eq "http://example.com/new.xml"
120 |       end
121 |     end
122 |   end
123 | end
124 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/atom_feed_burner_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require "spec_helper"
  4 | 
  5 | module Feedjira
  6 |   module Parser
  7 |     describe "#will_parse?" do
  8 |       it "returns true for a feedburner atom feed" do
  9 |         expect(AtomFeedBurner).to be_able_to_parse(sample_feedburner_atom_feed)
 10 |       end
 11 | 
 12 |       it "returns false for an rdf feed" do
 13 |         expect(AtomFeedBurner).not_to be_able_to_parse(sample_rdf_feed)
 14 |       end
 15 | 
 16 |       it "returns false for a regular atom feed" do
 17 |         expect(AtomFeedBurner).not_to be_able_to_parse(sample_atom_feed)
 18 |       end
 19 | 
 20 |       it "returns false for an rss feedburner feed" do
 21 |         expect(AtomFeedBurner).not_to be_able_to_parse sample_rss_feed_burner_feed
 22 |       end
 23 |     end
 24 | 
 25 |     describe "parsing old style feeds" do
 26 |       before do
 27 |         @feed = AtomFeedBurner.parse(sample_feedburner_atom_feed)
 28 |       end
 29 | 
 30 |       it "parses the title" do
 31 |         expect(@feed.title).to eq "Paul Dix Explains Nothing"
 32 |       end
 33 | 
 34 |       it "parses the description" do
 35 |         description = "Entrepreneurship, programming, software development, politics, NYC, and random thoughts."
 36 |         expect(@feed.description).to eq description
 37 |       end
 38 | 
 39 |       it "parses the url" do
 40 |         expect(@feed.url).to eq "http://www.pauldix.net/"
 41 |       end
 42 | 
 43 |       it "parses the feed_url" do
 44 |         expect(@feed.feed_url).to eq "http://feeds.feedburner.com/PaulDixExplainsNothing"
 45 |       end
 46 | 
 47 |       it "parses no hub urls" do
 48 |         expect(@feed.hubs.count).to eq 0
 49 |       end
 50 | 
 51 |       it "parses hub urls" do
 52 |         AtomFeedBurner.preprocess_xml = false
 53 |         feed_with_hub = AtomFeedBurner.parse(load_sample("TypePadNews.xml"))
 54 |         expect(feed_with_hub.hubs.count).to eq 1
 55 |       end
 56 | 
 57 |       it "parses entries" do
 58 |         expect(@feed.entries.size).to eq 5
 59 |       end
 60 | 
 61 |       it "changes url" do
 62 |         new_url = "http://some.url.com"
 63 |         expect { @feed.url = new_url }.not_to raise_error
 64 |         expect(@feed.url).to eq new_url
 65 |       end
 66 | 
 67 |       it "changes feed_url" do
 68 |         new_url = "http://some.url.com"
 69 |         expect { @feed.feed_url = new_url }.not_to raise_error
 70 |         expect(@feed.feed_url).to eq new_url
 71 |       end
 72 |     end
 73 | 
 74 |     describe "parsing alternate style feeds" do
 75 |       before do
 76 |         @feed = AtomFeedBurner.parse(sample_feedburner_atom_feed_alternate)
 77 |       end
 78 | 
 79 |       it "parses the title" do
 80 |         expect(@feed.title).to eq "Giant Robots Smashing Into Other Giant Robots"
 81 |       end
 82 | 
 83 |       it "parses the description" do
 84 |         description = "Written by thoughtbot"
 85 |         expect(@feed.description).to eq description
 86 |       end
 87 | 
 88 |       it "parses the url" do
 89 |         expect(@feed.url).to eq "https://robots.thoughtbot.com"
 90 |       end
 91 | 
 92 |       it "parses the feed_url" do
 93 |         expect(@feed.feed_url).to eq "http://feeds.feedburner.com/GiantRobotsSmashingIntoOtherGiantRobots"
 94 |       end
 95 | 
 96 |       it "parses hub urls" do
 97 |         expect(@feed.hubs.count).to eq 1
 98 |       end
 99 | 
100 |       it "parses entries" do
101 |         expect(@feed.entries.size).to eq 3
102 |       end
103 | 
104 |       it "changes url" do
105 |         new_url = "http://some.url.com"
106 |         expect { @feed.url = new_url }.not_to raise_error
107 |         expect(@feed.url).to eq new_url
108 |       end
109 | 
110 |       it "changes feed_url" do
111 |         new_url = "http://some.url.com"
112 |         expect { @feed.feed_url = new_url }.not_to raise_error
113 |         expect(@feed.feed_url).to eq new_url
114 |       end
115 |     end
116 | 
117 |     describe "preprocessing" do
118 |       it "retains markup in xhtml content" do
119 |         AtomFeedBurner.preprocess_xml = true
120 | 
121 |         feed = AtomFeedBurner.parse sample_feed_burner_atom_xhtml_feed
122 |         entry = feed.entries.first
123 | 
124 |         expect(entry.content).to match(/\A<p/)
125 |       end
126 |     end
127 |   end
128 | end
129 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/json_feed_item_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require "spec_helper"
  4 | 
  5 | describe Feedjira::Parser::JSONFeedItem do
  6 |   def params(**overrides)
  7 |     { "id" => "my_id", "url" => "my_url", **overrides }
  8 |   end
  9 | 
 10 |   before do
 11 |     # I don't really like doing it this way because these unit test should only
 12 |     # rely on JSONFeed, but this is actually how it should work. You would
 13 |     # never just pass entry json straight to the JSONFeedItem
 14 |     @entry = Feedjira::Parser::JSONFeed.parse(sample_json_feed).entries.first
 15 |   end
 16 | 
 17 |   it "parses the id" do
 18 |     expect(@entry.id).to eq "http://inessential.com/2017/06/02/james_dempsey_and_the_breakpoints_benefi"
 19 |   end
 20 | 
 21 |   it "parses the url" do
 22 |     expect(@entry.url).to eq "http://inessential.com/2017/06/02/james_dempsey_and_the_breakpoints_benefi"
 23 |   end
 24 | 
 25 |   it "parses the title" do
 26 |     expect(@entry.title).to eq "James Dempsey and the Breakpoints Benefit App Camp for Girls"
 27 |   end
 28 | 
 29 |   it "parses the content" do
 30 |     content = "<p>On Wednesday night I know where I’ll be — playing keyboard for a few songs at the James Dempsey and the Breakpoints concert benefitting App Camp for Girls.</p>\n\n<p><a href=\"https://www.classy.org/events/-/e126329\">You should get tickets</a>. It’s a fun time for a great cause.</p>\n\n<p>Bonus: James writes about how <a href=\"http://jamesdempsey.net/2017/06/02/wwdc-in-san-jose-full-circle/\">this concert is full circle for him</a>. It’s a special night.</p>"
 31 |     expect(@entry.content).to eq content
 32 |   end
 33 | 
 34 |   it "parses the published date" do
 35 |     published = Feedjira::Util::ParseTime.call "2017-06-02T22:05:47-07:00"
 36 |     expect(@entry.published).to eq published
 37 |   end
 38 | 
 39 |   it "sets the published date to nil when not present" do
 40 |     entry = described_class.new(params)
 41 | 
 42 |     expect(entry.published).to be_nil
 43 |   end
 44 | 
 45 |   it "sets updated to date_modified when present" do
 46 |     updated = "2017-06-02T22:05:47-07:00"
 47 |     entry = described_class.new(params("date_modified" => updated))
 48 | 
 49 |     updated = Feedjira::Util::ParseTime.call "2017-06-02T22:05:47-07:00"
 50 |     expect(entry.updated).to eq updated
 51 |   end
 52 | 
 53 |   it "sets updated to nil when date_modified is not present" do
 54 |     entry = described_class.new(params)
 55 | 
 56 |     expect(entry.updated).to be_nil
 57 |   end
 58 | 
 59 |   it "sets the author when nested author object is present" do
 60 |     entry = described_class.new(params("author" => { "name" => "John Doe" }))
 61 | 
 62 |     expect(entry.author).to eq "John Doe"
 63 |   end
 64 | 
 65 |   it "sets the author to nil when nested author object is not present" do
 66 |     entry = described_class.new(params)
 67 | 
 68 |     expect(entry.author).to be_nil
 69 |   end
 70 | 
 71 |   it "supports each" do
 72 |     expect(@entry).to respond_to :each
 73 |   end
 74 | 
 75 |   it "is able to list out all the fields with each" do
 76 |     all_fields = []
 77 |     title_value = ""
 78 |     @entry.each do |field, value|
 79 |       all_fields << field
 80 |       title_value = value if field == "title"
 81 |     end
 82 | 
 83 |     expect(title_value).to eq "James Dempsey and the Breakpoints Benefit App Camp for Girls"
 84 | 
 85 |     expected_fields = %w[
 86 |       author
 87 |       banner_image
 88 |       categories
 89 |       content
 90 |       entry_id
 91 |       external_url
 92 |       image
 93 |       json
 94 |       published
 95 |       summary
 96 |       title
 97 |       updated
 98 |       url
 99 |     ]
100 |     expect(all_fields).to match_array expected_fields
101 |   end
102 | 
103 |   it "supports checking if a field exists in the entry" do
104 |     expect(@entry).to include "title"
105 |     expect(@entry).to include "url"
106 |   end
107 | 
108 |   it "allows access to fields with hash syntax" do
109 |     expect(@entry["title"]).to eq "James Dempsey and the Breakpoints Benefit App Camp for Girls"
110 |     expect(@entry["url"]).to eq "http://inessential.com/2017/06/02/james_dempsey_and_the_breakpoints_benefi"
111 |   end
112 | 
113 |   it "allows setting field values with hash syntax" do
114 |     @entry["title"] = "Foobar"
115 |     expect(@entry.title).to eq "Foobar"
116 |   end
117 | end
118 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/TechCrunchFirstEntry.xml:
--------------------------------------------------------------------------------
1 | <img width="100" height="62" src="http://tctechcrunch2011.files.wordpress.com/2011/11/angies-list.jpeg?w=100&amp;h=62&amp;crop=1" class="attachment-tc-carousel-river-thumb wp-post-image" alt="angies-list" title="angies-list" style="float: left; margin: 0 10px 7px 0;" /><p>Angie&#8217;s List, which offers consumers a way to review and rate doctors, contractors and service companies on the Web, has just set the terms for its IPO. In a <a href="http://www.sec.gov/Archives/edgar/data/1491778/000119312511292292/d222159ds1a.htm">new filing</a>, the company revealed that it aims to raise as much as $131.4 million in the offering and has priced its IPO in the range of $11 to $13 per share. The company will <a href="http://techcrunch.com/2011/10/31/pre-ipo-angies-list-is-the-latest-tech-company-to-list-on-the-nasdaq/">list on the Nasdaq</a> under the symbol “ANGI.” At the high end of the range, Angie&#8217;s List would be valued at nearly $700 million. </p>
2 | <p>Angie’s List launched in 1995 with a focus on local home, yard and car services, sits at the intersection of local search, user-generated content and subscription-based services. To date, Angie’s List has raised nearly $100 million from Battery Ventures, T. Rowe Price, City Investment Group, Cardinal Ventures and others.</p>
3 | <p>As of September 30, 2011, the company offered its service to paying members in 175 local markets in the United States (compared to 170 as of August). Angie’s List now has more than 1 million (up from 820,000) paid memberships.</p>
4 | <p>Angie&#8217;s List incurred marketing expenses of $30.2 million and $48 million in 2010 and the nine months ended September 30, 2011, respectively. In 2010 and the nine months ended September 30, 2011, the company&#8217;s revenue was $59.0 million and $62.6 million, respectively. In the same periods, Angie&#8217;s net loss was $27.2 million and $43.2 million. Angie&#8217;s List has incurred net losses its start and had an accumulated deficit of $160.6 million as of September 30, 2011.</p>
5 |   <br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/tctechcrunch2011.wordpress.com/446154/"></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/tctechcrunch2011.wordpress.com/446154/"></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/tctechcrunch2011.wordpress.com/446154/"></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/tctechcrunch2011.wordpress.com/446154/"></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/tctechcrunch2011.wordpress.com/446154/"></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/tctechcrunch2011.wordpress.com/446154/"></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/tctechcrunch2011.wordpress.com/446154/"></a> 
6 | <p><a href="http://feedads.g.doubleclick.net/~at/qv0C7ycnV3JrFMT7HnFWNXc9uEY/0/da"><img src="http://feedads.g.doubleclick.net/~at/qv0C7ycnV3JrFMT7HnFWNXc9uEY/0/di" border="0" ismap="true"></img></a><br/>
7 | <a href="http://feedads.g.doubleclick.net/~at/qv0C7ycnV3JrFMT7HnFWNXc9uEY/1/da"><img src="http://feedads.g.doubleclick.net/~at/qv0C7ycnV3JrFMT7HnFWNXc9uEY/1/di" border="0" ismap="true"></img></a></p><div class="feedflare">
8 | <a href="http://feeds.feedburner.com/~ff/Techcrunch?a=kTeeGj4FnwU:ARydNZJ6SxI:2mJPEYqXBVI"><img src="http://feeds.feedburner.com/~ff/Techcrunch?d=2mJPEYqXBVI" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/Techcrunch?a=kTeeGj4FnwU:ARydNZJ6SxI:7Q72WNTAKBA"><img src="http://feeds.feedburner.com/~ff/Techcrunch?d=7Q72WNTAKBA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/Techcrunch?a=kTeeGj4FnwU:ARydNZJ6SxI:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/Techcrunch?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/Techcrunch?a=kTeeGj4FnwU:ARydNZJ6SxI:-BTjWOF_DHI"><img src="http://feeds.feedburner.com/~ff/Techcrunch?i=kTeeGj4FnwU:ARydNZJ6SxI:-BTjWOF_DHI" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/Techcrunch?a=kTeeGj4FnwU:ARydNZJ6SxI:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/Techcrunch?i=kTeeGj4FnwU:ARydNZJ6SxI:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/Techcrunch?a=kTeeGj4FnwU:ARydNZJ6SxI:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/Techcrunch?d=qj6IDK7rITs" border="0"></img></a>
9 | </div><img src="http://feeds.feedburner.com/~r/Techcrunch/~4/kTeeGj4FnwU" height="1" width="1"/>


--------------------------------------------------------------------------------
/spec/sample_feeds/FeedjiraBlog.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title>Feedjira Blog</title>
 4 |   <subtitle>A Blog for Feedjira</subtitle>
 5 |   <id>http://feedjira.com/blog</id>
 6 |   <link href="http://feedjira.com/blog"/>
 7 |   <link href="http://feedjira.com/blog/feed.xml" rel="self"/>
 8 |   <updated>2014-03-17T00:00:00Z</updated>
 9 |   <author>
10 |     <name>Jon Allured</name>
11 |   </author>
12 |   <entry>
13 |     <title>Feedjira Goes One-Point-Oh</title>
14 |     <link rel="alternate" href="http://feedjira.com/blog/2014/03/17/feedjira-goes-one-point-oh.html"/>
15 |     <id>http://feedjira.com/blog/2014/03/17/feedjira-goes-one-point-oh.html</id>
16 |     <published>2014-03-17T00:00:00Z</published>
17 |     <updated>2014-03-17T08:02:45-05:00</updated>
18 |     <author>
19 |       <name>Jon Allured</name>
20 |     </author>
21 |     <content type="html">&lt;p&gt;Last fall, I asked &lt;a href="http://www.pauldix.net"&gt;Paul Dix&lt;/a&gt; if I could take over maintenance of his gem
22 | Feedzirra. My request was totally out of the blue, so I was pretty pumped when
23 | he got right back to me and said yes. He said that he didn&amp;rsquo;t have time to work
24 | on it anymore and so I should feel free to do whatever I thought was best.&lt;/p&gt;
25 | 
26 | &lt;p&gt;Score!&lt;/p&gt;
27 | 
28 | &lt;p&gt;My first order of business was to go through the many open issues and pull
29 | requests on GitHub. When I started there were over 60, a number that I&amp;rsquo;ve gotten
30 | down to just a few. I thought it was important to ensure that users saw me treat
31 | their issue as important and even if it was very old (which many were), I asked
32 | if there was anything I could do to help.&lt;/p&gt;
33 | 
34 | &lt;p&gt;I was pleasantly surprised by the nice way many people responded and we got to
35 | work addressing their questions and issues.&lt;/p&gt;
36 | 
37 | &lt;p&gt;As I was working through issues and pull requests, I kept &lt;a href="http://semver.org"&gt;SemVer&lt;/a&gt; in mind -
38 | bug fixes in patch releases and backward-compatible changes in minor releases.
39 | But I also realized that it was past time for this project to be at version 1.0.
40 | In the SemVer FAQ, they talk about when to release version 1.0 and Feedzirra fit
41 | the bill: it was being used in production, there was a stable API and I was
42 | taking backwards compatibilty seriously.&lt;/p&gt;
43 | 
44 | &lt;p&gt;So I treated it as a project at 1.0 and I did my best to release versions that
45 | were backward compatible and added deprecations for what I wanted to do in 1.0.
46 | I saw things that I wanted to completely rewrite, but I resisted the urge to
47 | burn it all down and start again.&lt;/p&gt;
48 | 
49 | &lt;p&gt;When I was close to being caught up on the backlog of issues and pull requests,
50 | I started thinking about releasing version 1.0, and I knew I wanted to create a
51 | website for the project. I worked with &lt;a href="http://danielariza.com"&gt;Daniel Ariza&lt;/a&gt; to make it happen. I
52 | ripped apart the README and rewrote just about all the sections.&lt;/p&gt;
53 | 
54 | &lt;p&gt;There was an open issue on the project about renaming the Gem and I knew that
55 | launching the website and releasing 1.0 would be the perfect opportunity, so I
56 | went for it. There was a suggestion to change the name to Feedzilla, but since
57 | that is already a thing, I went with Feedjira. I bought the domain and setup an
58 | organization by that name on GitHub.&lt;/p&gt;
59 | 
60 | &lt;p&gt;With those things in place, I needed to actually update the code for these
61 | changes. I wanted to make this transition as easy as possible and devised a
62 | simple way to use &lt;a href="/versions.html"&gt;three versions&lt;/a&gt; to make the jump to 1.0.&lt;/p&gt;
63 | 
64 | &lt;p&gt;For most users, upgrading to 1.0 should be a breeze, but I have an &lt;a href="/upgrading.html"&gt;upgrade
65 | page&lt;/a&gt; to help with a couple details. If you have any trouble upgrading,
66 | please let me know by &lt;a href="https://github.com/feedjira/feedjira/issues"&gt;opening an issue&lt;/a&gt;.&lt;/p&gt;
67 | 
68 | &lt;p&gt;There are still lots of things I&amp;rsquo;d like to do with this Gem. I mentioned seeing
69 | things that I wanted to completely rewrite, so that&amp;rsquo;ll be something that I work
70 | on for a 2.0 release, but that&amp;rsquo;s a ways off. I&amp;rsquo;d like to officially support
71 | JRuby. Many people use Feedjira with Rails, so a separate project that helps
72 | those users get up and running quickly seems to have value.&lt;/p&gt;
73 | 
74 | &lt;p&gt;The list goes on.&lt;/p&gt;
75 | 
76 | &lt;p&gt;I do have a request before I finish this thing: I&amp;rsquo;d like to hear from users that
77 | have apps in production using Feedjira. If you&amp;rsquo;re using Feedjira for a
78 | commercial app, please &lt;a href="feedjira@gmail.com"&gt;email me&lt;/a&gt;!&lt;/p&gt;
79 | 
80 | &lt;p&gt;Thanks to everyone who has helped me accomplish this, but especially &lt;a href="http://www.pauldix.net"&gt;Paul
81 | Dix&lt;/a&gt; for creating such a fun project to work on, &lt;a href="http://danielariza.com"&gt;Daniel Ariza&lt;/a&gt; for a
82 | badass website design and the many people who opened issues or sent pull
83 | requests. Open source is fun to work on because of people like you!! &amp;lt;3 &amp;lt;3 &amp;lt;3&lt;/p&gt;
84 | </content>
85 |   </entry>
86 | </feed>
87 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/itunes.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | <?xml version="1.0" encoding="UTF-8"?>
 3 | <rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0">
 4 |  
 5 | <channel>
 6 | <title>All About Everything</title>
 7 | <link>http://www.example.com/podcasts/everything/index.html</link>
 8 | <language>en-us</language>
 9 | <copyright>&#x2117; &amp; &#xA9; 2005 John Doe &amp; Family</copyright>
10 | <lastBuildDate>Sat, 07 Sep 2002 09:42:31 GMT</lastBuildDate>
11 | <ttl>60</ttl>
12 | <itunes:type>episodic</itunes:type>
13 | <itunes:subtitle>A show about everything</itunes:subtitle>
14 | <itunes:new-feed-url>http://example.com/new.xml</itunes:new-feed-url>
15 | <itunes:author>John Doe</itunes:author>
16 | <itunes:summary>All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store</itunes:summary>
17 | <description>All About Everything is a show about everything.</description>
18 | <description>All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store</description>
19 | <itunes:complete>yes</itunes:complete>
20 | <itunes:owner>
21 | <itunes:name>John Doe</itunes:name>
22 | <itunes:email>john.doe@example.com</itunes:email>
23 | </itunes:owner>
24 | <itunes:image href="http://example.com/podcasts/everything/AllAboutEverything.jpg" />
25 | <image>
26 |   <url>http://example.com/podcasts/everything/AllAboutEverything.jpg</url>
27 |   <title>All About Everything</title>
28 |   <link>http://www.example.com/podcasts/everything/index.html</link>
29 |   <width>88</width>
30 |   <height>31</height>
31 |   <description>All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store</description>
32 | </image>
33 | <itunes:category text="Technology">
34 |   <itunes:category text="Gadgets"/>
35 | </itunes:category>
36 | <itunes:category text="TV &amp; Film"/>
37 | <itunes:category text="Arts">
38 |   <itunes:category text="Design"/>
39 |   <itunes:category text="Food"/>
40 | </itunes:category>
41 | <item>
42 | <title>Shake Shake Shake Your Spices</title>
43 | <itunes:title>Shake Shake Shake Your Spices</itunes:title>
44 | <itunes:episodeType>full</itunes:episodeType>
45 | <itunes:season>1</itunes:season>
46 | <itunes:episode>3</itunes:episode>
47 | <itunes:author>John Doe</itunes:author>
48 | <itunes:subtitle>A short primer on table spices</itunes:subtitle>
49 | <itunes:summary>This week we talk about salt and pepper shakers, comparing and contrasting pour rates, construction materials, and overall aesthetics. Come and join the party!</itunes:summary>
50 | <enclosure url="http://example.com/podcasts/everything/AllAboutEverythingEpisode3.m4a" length="8727310" type="audio/x-m4a" />
51 | <guid>http://example.com/podcasts/archive/aae20050615.m4a</guid>
52 | <pubDate>Wed, 15 Jun 2005 19:00:00 GMT</pubDate>
53 | <itunes:duration>7:04</itunes:duration>
54 | <itunes:keywords>salt, pepper, shaker, exciting</itunes:keywords>
55 | <itunes:image href="http://example.com/podcasts/everything/AllAboutEverything.jpg" />
56 | <itunes:order>12</itunes:order>
57 | <itunes:isClosedCaptioned>yes</itunes:isClosedCaptioned>
58 | <content:encoded>&lt;p&gt;&lt;strong&gt;TOPIC&lt;/strong&gt;: Gooseneck Options&lt;/p&gt;</content:encoded>
59 | </item>
60 |  
61 | <item>
62 | <title>Socket Wrench Shootout</title>
63 | <itunes:title>Socket Wrench Shootout</itunes:title>
64 | <itunes:episodeType>full</itunes:episodeType>
65 | <itunes:season>1</itunes:season>
66 | <itunes:episode>2</itunes:episode>
67 | <itunes:author>Jane Doe</itunes:author>
68 | <itunes:subtitle>Comparing socket wrenches is fun!</itunes:subtitle>
69 | <itunes:summary>This week we talk about metric vs. old english socket wrenches. Which one is better? Do you really need both? Get all of your answers here.</itunes:summary>
70 | <enclosure url="http://example.com/podcasts/everything/AllAboutEverythingEpisode2.mp3" length="5650889" type="audio/mpeg" />
71 | <guid>http://example.com/podcasts/archive/aae20050608.mp3</guid>
72 | <pubDate>Wed, 8 Jun 2005 19:00:00 GMT</pubDate>
73 | <itunes:duration>4:34</itunes:duration>
74 | <itunes:keywords>metric, socket, wrenches, tool</itunes:keywords>
75 | <itunes:image href="http://example.com/podcasts/everything/AllAboutEverything.jpg" />
76 | </item>
77 |  
78 | <item>
79 | <title>Red, Whine, &amp; Blue</title>
80 | <itunes:title>Red, Whine, &amp; Blue</itunes:title>
81 | <itunes:episodeType>full</itunes:episodeType>
82 | <itunes:season>1</itunes:season>
83 | <itunes:episode>1</itunes:episode>
84 | <itunes:author>Various</itunes:author>
85 | <itunes:subtitle>Red + Blue != Purple</itunes:subtitle>
86 | <itunes:summary>This week we talk about surviving in a Red state if you are a Blue person. Or vice versa.</itunes:summary>
87 | <enclosure url="http://example.com/podcasts/everything/AllAboutEverythingEpisode1.mp3" length="4989537" type="audio/mpeg" />
88 | <guid>http://example.com/podcasts/archive/aae20050601.mp3</guid>
89 | <pubDate>Wed, 1 Jun 2005 19:00:00 GMT</pubDate>
90 | <itunes:duration>3:59</itunes:duration>
91 | <itunes:keywords>politics, red, blue, state</itunes:keywords>
92 | <itunes:image href="http://example.com/podcasts/everything/AllAboutEverything.jpg" />
93 | </item>
94 |  
95 | </channel>
96 | </rss>
97 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml:
--------------------------------------------------------------------------------
 1 | <p>Last week I released the first version of a <a href="http://www.pauldix.net/2009/01/sax-machine-sax-parsing-made-easy.html">SAX based XML parsing library called SAX-Machine</a>. It uses Nokogiri, which uses libxml, so it's pretty fast. However, I felt that it could be even faster. The only question was how to make a ruby library that is already using c underneath perform better. Since I've never written a Ruby C extension and it's been a few years since I've touched C, I decided it would be a good educational experience to give it a try.</p>
 2 | 
 3 | <p>First, let's look into how Nokogiri and SAX-Machine perform a parse. The syntax for SAX-Machine builds up a set of class variables (actually, instance variables on a class object) that describe what you're interested in parsing. So when you see something like this:
 4 | </p><script src="http://gist.github.com/50549.js"></script><p>
 5 | It calls the 'element' and 'elements' methods inserted by the SAXMachine module that build up ruby objects that describe what XML tags we're interested in for the Entry class. That's all pretty straight forward and not really the source of any slowdown in the parsing process. These calls only happen once, when you first load the class.
 6 | 
 7 | </p><p>Things get interesting when you run a parse. So you run Entry.parse(some_xml). That makes the call to Nokogiri, which in turn makes a call to libxml. Libxml then parses over the stream (or string) and makes calls to C methods (in Nokogiri) on certain events. For our purposes, the most interesting are start_element, end_element, and characters_func. The C code in Nokogiri for these is basic. It simply converts those C variables into Ruby ones and then makes calls to whatever instance of Nokogiri::XML:SAX::Document (a Ruby object) is associated with this parse. This is where SAXMachine comes back in. It has handlers for these events that match up the tags with the previously defined SAXMachine objects attached to the Entry class. It ignores the events that don't match a tag (however, it still needs to determine if the tag should be ignored).</p>
 8 | 
 9 | <p>The only possible place I saw to speed things up was to push more of SAX event handling down into the C code. Unfortunately, the only way to do this was to abandon Nokogiri and write my own code to interface with libxml. I used the xml_sax_parser.c from Nokogiri as a base and added to it. I changed it so the SAXMachine definitions of what was interesting would be stored in C. I then changed the SAX handling code to capture the events in C and determine if a tag was of interest there before sending it off to the Ruby objects. The end result is that calls are only made to Ruby when there is an actual event of interest. Thus, I avoid doing any comparisons in Ruby and those classes are simply wrappers that call out to the correct value setters.</p>
10 | 
11 | <p>Here are the results of a quick speed comparison against the Nokogiri SAXMachine, parsing my atom feed using <a href="http://gist.github.com/47938">code from my last post</a>.</p>
12 | <pre>        user   system  total   real<br>sax c    0.060000 0.000000 0.060000 ( 0.069990)<br>sax nokogiri 0.500000 0.010000 0.510000 ( 0.520278)<br></pre><p>
13 | The SAX C is 7.4 times faster than SAX Nokogiri. Now, that doesn't seem like a whole lot, but I think it's quite good considering it was against a library that was already half in C. It's even more punctuated when you look at the comparison of these two against rfeedparser.
14 | </p><pre>        user   system  total    real<br>sax c     0.060000 0.000000 0.060000  ( 0.069990)<br>sax nokogiri 0.500000 0.010000 0.510000  ( 0.520278)<br>rfeedparser 13.770000 1.730000 15.500000 ( 15.690309)<br></pre>
15 | <p>The SAX C version is 224 times faster than rfeedparser! The 7 times multiple from the Nokogiri version of SAXMachine really makes a difference. Unfortunately, I really only wrote this code as a test. It's not even close to something I would use for real. It has memory leaks, isn't thread safe, is completely unreadable, and has hidden bugs that I know about. You can take a look at it in all its misery on the <a href="http://github.com/pauldix/sax-machine/tree/c-refactor">c-rafactor branch of SAXMachine on github</a>. Even though the code is awful, I think it's interesting that there can be this much variability in performance on Ruby libraries that are using C.</p>
16 | 
17 | <p>I could actually turn this into a legitimate working version, but it would take more work than I think it's worth at this point. Also, I'm not excited about the idea of dealing with C issues in SAXMachine. I would be more excited for it if I could get this type of SAX parsing thing into Nokogiri (in addition to the one that is there now). For now, I'll move on to using the Nokogiri version of SAXMachine to create a feed parsing library.</p><div class="feedflare">
18 | <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=9Q8qfQ.P"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=9Q8qfQ.P" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=rLK96Z.p"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=rLK96Z.p" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=B95sFg.p"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=B95sFg.p" border="0"></img></a>
19 | </div><img src="http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~4/519925023" height="1" width="1"/>


--------------------------------------------------------------------------------
/spec/feedjira/feed_utilities_entry_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require "spec_helper"
  4 | 
  5 | describe Feedjira::FeedUtilities do
  6 |   before do
  7 |     @klass = Class.new do
  8 |       include Feedjira::FeedEntryUtilities
  9 |     end
 10 |   end
 11 | 
 12 |   describe "handling dates" do
 13 |     it "parses an ISO 8601 formatted datetime into Time" do
 14 |       time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
 15 |       expect(time.class).to eq Time
 16 |       expect(time).to eq Feedjira::Util::ParseTime.call("Wed Feb 20 18:05:00 UTC 2008")
 17 |     end
 18 | 
 19 |     it "parses a ISO 8601 with milliseconds into Time" do
 20 |       time = @klass.new.parse_datetime("2013-09-17T08:20:13.931-04:00")
 21 |       expect(time.class).to eq Time
 22 |       expect(time).to eq Time.strptime("Tue Sep 17 12:20:13.931 UTC 2013", "%a %b %d %H:%M:%S.%N %Z %Y")
 23 |     end
 24 |   end
 25 | 
 26 |   describe "updated= method" do
 27 |     it "sets updated when no existing updated value and parsed date is valid" do
 28 |       instance = @klass.new
 29 |       instance.updated = "2023-01-01T10:00:00Z"
 30 |       expect(instance["updated"]).to eq Time.parse("2023-01-01T10:00:00Z").utc
 31 |     end
 32 | 
 33 |     it "updates to newer date when existing updated value is older" do
 34 |       instance = @klass.new
 35 |       instance.updated = "2023-01-01T10:00:00Z"
 36 |       instance.updated = "2023-01-02T10:00:00Z"
 37 |       expect(instance["updated"]).to eq Time.parse("2023-01-02T10:00:00Z").utc
 38 |     end
 39 | 
 40 |     it "keeps existing updated value when new date is older" do
 41 |       instance = @klass.new
 42 |       instance.updated = "2023-01-02T10:00:00Z"
 43 |       instance.updated = "2023-01-01T10:00:00Z"
 44 |       expect(instance["updated"]).to eq Time.parse("2023-01-02T10:00:00Z").utc
 45 |     end
 46 | 
 47 |     it "does not set updated when date parsing fails" do
 48 |       instance = @klass.new
 49 |       instance.updated = "invalid-date"
 50 |       expect(instance["updated"]).to be_nil
 51 |     end
 52 | 
 53 |     it "does not change existing updated when new date is invalid" do
 54 |       instance = @klass.new
 55 |       instance.updated = "2023-01-01T10:00:00Z"
 56 |       original_updated = instance["updated"]
 57 |       instance.updated = "invalid-date"
 58 |       expect(instance["updated"]).to eq original_updated
 59 |     end
 60 |   end
 61 | 
 62 |   describe "published= method" do
 63 |     it "sets published when no existing published value and parsed date is valid" do
 64 |       instance = @klass.new
 65 |       instance.published = "2023-01-01T10:00:00Z"
 66 |       expect(instance["published"]).to eq Time.parse("2023-01-01T10:00:00Z").utc
 67 |     end
 68 | 
 69 |     it "updates to older date when existing published value is newer" do
 70 |       instance = @klass.new
 71 |       instance.published = "2023-01-02T10:00:00Z"
 72 |       instance.published = "2023-01-01T10:00:00Z"
 73 |       expect(instance["published"]).to eq Time.parse("2023-01-01T10:00:00Z").utc
 74 |     end
 75 | 
 76 |     it "keeps existing published value when new date is newer" do
 77 |       instance = @klass.new
 78 |       instance.published = "2023-01-01T10:00:00Z"
 79 |       instance.published = "2023-01-02T10:00:00Z"
 80 |       expect(instance["published"]).to eq Time.parse("2023-01-01T10:00:00Z").utc
 81 |     end
 82 | 
 83 |     it "does not set published when date parsing fails" do
 84 |       instance = @klass.new
 85 |       instance.published = "invalid-date"
 86 |       expect(instance["published"]).to be_nil
 87 |     end
 88 | 
 89 |     it "does not change existing published when new date is invalid" do
 90 |       instance = @klass.new
 91 |       instance.published = "2023-01-01T10:00:00Z"
 92 |       original_published = instance["published"]
 93 |       instance.published = "invalid-date"
 94 |       expect(instance["published"]).to eq original_published
 95 |     end
 96 |   end
 97 | 
 98 |   describe "sanitizing" do
 99 |     before do
100 |       @feed = Feedjira.parse(sample_atom_feed)
101 |       @entry = @feed.entries.first
102 |     end
103 | 
104 |     it "doesn't fail when no elements are defined on includer" do
105 |       expect { @klass.new.sanitize! }.not_to raise_error
106 |     end
107 | 
108 |     it "provides a sanitized title" do
109 |       new_title = "<script>this is not safe</script>#{@entry.title}"
110 |       @entry.title = new_title
111 |       scrubbed_title = Loofah.scrub_fragment(new_title, :prune).to_s
112 |       expect(Loofah.scrub_fragment(@entry.title, :prune).to_s).to eq scrubbed_title
113 |     end
114 | 
115 |     it "sanitizes content in place" do
116 |       new_content = "<script>#{@entry.content}"
117 |       @entry.content = new_content.dup
118 | 
119 |       scrubbed_content = Loofah.scrub_fragment(new_content, :prune).to_s
120 | 
121 |       @entry.sanitize!
122 |       expect(@entry.content).to eq scrubbed_content
123 |     end
124 | 
125 |     it "sanitizes things in place" do
126 |       @entry.title   += "<script>"
127 |       @entry.author  += "<script>"
128 |       @entry.content += "<script>"
129 | 
130 |       cleaned_title   = Loofah.scrub_fragment(@entry.title, :prune).to_s
131 |       cleaned_author  = Loofah.scrub_fragment(@entry.author, :prune).to_s
132 |       cleaned_content = Loofah.scrub_fragment(@entry.content, :prune).to_s
133 | 
134 |       @entry.sanitize!
135 |       expect(@entry.title).to   eq cleaned_title
136 |       expect(@entry.author).to  eq cleaned_author
137 |       expect(@entry.content).to eq cleaned_content
138 |     end
139 |   end
140 | end
141 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/rss_entry_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require "spec_helper"
  4 | 
  5 | describe Feedjira::Parser::RSSEntry do
  6 |   before do
  7 |     # I don't really like doing it this way because these unit test should only
  8 |     # rely on RSSEntry, but this is actually how it should work. You would
  9 |     # never just pass entry xml straight to the AtomEnry
 10 |     @entry = Feedjira::Parser::RSS.parse(sample_rss_feed).entries.first
 11 |     tag = "wfw:commentRss"
 12 |     Feedjira::Feed.add_common_feed_entry_element tag, as: :comment_rss
 13 |   end
 14 | 
 15 |   after do
 16 |     # We change the title in one or more specs to test []=
 17 |     if @entry.title != "Nokogiri’s Slop Feature"
 18 |       feed = Feedjira::Parser::RSS.parse sample_rss_feed
 19 |       @entry.title = feed.entries.first.title
 20 |     end
 21 |   end
 22 | 
 23 |   it "parses the title" do
 24 |     expect(@entry.title).to eq "Nokogiri’s Slop Feature"
 25 |   end
 26 | 
 27 |   it "parses the url" do
 28 |     expect(@entry.url).to eq "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
 29 |   end
 30 | 
 31 |   it "parses the author" do
 32 |     expect(@entry.author).to eq "Aaron Patterson"
 33 |   end
 34 | 
 35 |   it "parses the content" do
 36 |     expect(@entry.content).to eq sample_rss_entry_content
 37 |   end
 38 | 
 39 |   it "provides a summary" do
 40 |     summary = "Oops!  When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added.  Why is it called \"slop\"?  It lets you sloppily explore documents.  Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop&#40;&#60;&#60;-eohtml&#41;\n&#60;html&#62;\n&#160; &#60;body&#62;\n&#160; [...]"
 41 |     expect(@entry.summary).to eq summary
 42 |   end
 43 | 
 44 |   it "parses the published date" do
 45 |     published = Feedjira::Util::ParseTime.call "Thu Dec 04 17:17:49 UTC 2008"
 46 |     expect(@entry.published).to eq published
 47 |   end
 48 | 
 49 |   it "parses the categories" do
 50 |     expect(@entry.categories).to eq %w[computadora nokogiri rails]
 51 |   end
 52 | 
 53 |   it "parses the guid as id" do
 54 |     expect(@entry.id).to eq "http://tenderlovemaking.com/?p=198"
 55 |   end
 56 | 
 57 |   it "supports each" do
 58 |     expect(@entry).to respond_to :each
 59 |   end
 60 | 
 61 |   it "is able to list out all fields with each" do
 62 |     all_fields = []
 63 |     title_value = ""
 64 |     @entry.each do |field, value|
 65 |       all_fields << field
 66 |       title_value = value if field == "title"
 67 |     end
 68 | 
 69 |     expect(title_value).to eq "Nokogiri’s Slop Feature"
 70 | 
 71 |     expected_fields = %w[
 72 |       author
 73 |       categories
 74 |       comment_rss
 75 |       comments
 76 |       content
 77 |       entry_id
 78 |       published
 79 |       summary
 80 |       title
 81 |       url
 82 |     ]
 83 |     expect(all_fields.sort).to eq expected_fields
 84 |   end
 85 | 
 86 |   it "supports checking if a field exists in the entry" do
 87 |     expect(@entry).to include "title"
 88 |     expect(@entry).to include "author"
 89 |   end
 90 | 
 91 |   it "allows access to fields with hash syntax" do
 92 |     expect(@entry["title"]).to eq "Nokogiri’s Slop Feature"
 93 |     expect(@entry["author"]).to eq "Aaron Patterson"
 94 |   end
 95 | 
 96 |   it "allows setting field values with hash syntax" do
 97 |     @entry["title"] = "Foobar"
 98 |     expect(@entry.title).to eq "Foobar"
 99 |   end
100 | 
101 |   it "ignores urls from guids with isPermaLink='false'" do
102 |     feed = Feedjira.parse(sample_rss_feed_permalinks)
103 |     expect(feed.entries[0].url).to be_nil
104 |   end
105 | 
106 |   it "gets urls from guids with isPermaLink='true'" do
107 |     feed = Feedjira.parse(sample_rss_feed_permalinks)
108 |     expect(feed.entries[1].url).to eq "http://example.com/2"
109 |   end
110 | 
111 |   it "gets urls from guid where isPermaLink is unspecified" do
112 |     feed = Feedjira.parse(sample_rss_feed_permalinks)
113 |     expect(feed.entries[2].url).to eq "http://example.com/3"
114 |   end
115 | 
116 |   it "prefers urls from <link> when both guid and link are specified" do
117 |     feed = Feedjira.parse(sample_rss_feed_permalinks)
118 |     expect(feed.entries[3].url).to eq "http://example.com/4"
119 |   end
120 | 
121 |   it "exposes comments URL" do
122 |     feed = Feedjira.parse(sample_rss_feed_with_comments)
123 |     expect(feed.entries[0].comments).to eq "https://news.ycombinator.com/item?id=30937433"
124 |   end
125 | 
126 |   it "returns nil when no URL is available from link or guid" do
127 |     xml = <<~XML
128 |       <rss version="2.0">
129 |         <channel>
130 |           <item>
131 |             <title>Entry without URL</title>
132 |             <description>This entry has no link or guid</description>
133 |           </item>
134 |         </channel>
135 |       </rss>
136 |     XML
137 | 
138 |     feed = Feedjira.parse(xml)
139 |     entry = feed.entries.first
140 | 
141 |     expect(entry.url).to be_nil
142 |   end
143 | 
144 |   it "returns nil when guid exists but is not a permalink" do
145 |     xml = <<~XML
146 |       <rss version="2.0">
147 |         <channel>
148 |           <item>
149 |             <title>Entry with non-permalink GUID</title>
150 |             <guid isPermaLink="false">some-guid-123</guid>
151 |           </item>
152 |         </channel>
153 |       </rss>
154 |     XML
155 | 
156 |     feed = Feedjira.parse(xml)
157 |     entry = feed.entries.first
158 | 
159 |     expect(entry.url).to be_nil
160 |   end
161 | end
162 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/TenderLovemakingFirstEntry.xml:
--------------------------------------------------------------------------------
 1 | <p>Oops!  When I released <a href="http://nokogiri.rubyforge.org/" onclick="javascript:urchinTracker ('/outbound/article/nokogiri.rubyforge.org');">nokogiri</a> version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added.  Why is it called "slop"?  It lets you sloppily explore documents.  Basically, it decorates your document with method_missing() that allows you to search your document via method calls.</p>
 2 | <p>Given this document:</p>
 3 | <div class="codesnip-container" >
 4 | <div class="codesnip" style="font-family: monospace;">doc = Nokogiri::Slop<span class="br0">&#40;</span>&lt;&lt;-eohtml<span class="br0">&#41;</span><br />
 5 | &lt;html&gt;<br />
 6 | &nbsp; &lt;body&gt;<br />
 7 | &nbsp; &nbsp; &lt;p&gt;hello&lt;/p&gt;<br />
 8 | &nbsp; &nbsp; &lt;p <span class="kw1">class</span>=<span class="st0">&quot;bold&quot;</span>&gt;bold hello&lt;/p&gt;<br />
 9 | &nbsp; &lt;body&gt;<br />
10 | &lt;/html&gt;<br />
11 | eohtml</div>
12 | </div>
13 | <p>You may look through the tree like so:</p>
14 | <div class="codesnip-container" >
15 | <div class="codesnip" style="font-family: monospace;">doc.<span class="me1">html</span>.<span class="me1">body</span>.<span class="kw3">p</span><span class="br0">&#40;</span>'.<span class="me1">bold</span>'<span class="br0">&#41;</span>.<span class="me1">text</span> <span class="co1"># =&gt; 'bold hello' </span></div>
16 | </div>
17 | <p>The way this works is that method missing is implemented on every node in the document tree.  That method missing method creates an xpath or css query by using the method name and method arguments.  This means that a new search is executed for every method call.  It's fun for playing around, but you definitely won't get the same performance as using one specific CSS search.</p>
18 | <p>My favorite part is that method missing is actually in the <a href="http://github.com/tenderlove/nokogiri/tree/master/lib/nokogiri/decorators/slop.rb" onclick="javascript:urchinTracker ('/outbound/article/github.com');">slop decorator</a>.  When you use the Nokogiri::Slop() method, it adds the decorator to a list that gets mixed in to every node instance at runtime using Module#extend.  That lets me have sweet method missing action, without actually putting method missing in my Node class.</p>
19 | <p>Here is a simplified example:</p>
20 | <div class="codesnip-container" >
21 | <div class="codesnip" style="font-family: monospace;"><span class="kw1">module</span> Decorator<br />
22 | &nbsp; <span class="kw1">def</span> method_a<br />
23 | &nbsp; &nbsp; <span class="st0">&quot;method a&quot;</span><br />
24 | &nbsp; <span class="kw1">end</span></p>
25 | <p>&nbsp; <span class="kw1">def</span> method_b<br />
26 | &nbsp; &nbsp; <span class="st0">&quot;method b: #{super}&quot;</span><br />
27 | &nbsp; <span class="kw1">end</span><br />
28 | <span class="kw1">end</span></p>
29 | <p><span class="kw1">class</span> Foo<br />
30 | &nbsp; <span class="kw1">def</span> method_b<br />
31 | &nbsp; &nbsp; <span class="st0">&quot;inside foo&quot;</span><br />
32 | &nbsp; <span class="kw1">end</span><br />
33 | <span class="kw1">end</span></p>
34 | <p>foo = Foo.<span class="me1">new</span><br />
35 | foo.<span class="me1">extend</span><span class="br0">&#40;</span>Decorator<span class="br0">&#41;</span></p>
36 | <p><span class="kw3">puts</span> foo.<span class="me1">method_a</span> <span class="co1"># =&gt; 'method a'</span><br />
37 | <span class="kw3">puts</span> foo.<span class="me1">method_b</span> <span class="co1"># =&gt; 'method b: inside foo'</span></p>
38 | <p>foo2 = Foo.<span class="me1">new</span><br />
39 | <span class="kw3">puts</span> foo2.<span class="me1">method_b</span> <span class="co1"># =&gt; 'inside foo'</span><br />
40 | <span class="kw3">puts</span> foo2.<span class="me1">method_a</span> <span class="co1"># =&gt; NoMethodError </span></div>
41 | </div>
42 | <p>Module#extend is used to add functionality to the <strong>instance</strong> 'foo', but not 'foo2'.  Both 'foo' and 'foo2' are instances of Foo, but using Module#extend, we can conditionally add functionality <strong>without monkey patching</strong> and keeping a clean separation of concerns.  You can even reach previous functionality by calling super.</p>
43 | <p>But wait!  There's more!  You can stack up these decorators as much as you want.  For example:</p>
44 | <div class="codesnip-container" >
45 | <div class="codesnip" style="font-family: monospace;"><span class="kw1">module</span> AddAString<br />
46 | &nbsp; <span class="kw1">def</span> method<br />
47 | &nbsp; &nbsp; <span class="st0">&quot;Added a string: #{super}&quot;</span><br />
48 | &nbsp; <span class="kw1">end</span><br />
49 | <span class="kw1">end</span></p>
50 | <p><span class="kw1">module</span> UpperCaseResults<br />
51 | &nbsp; <span class="kw1">def</span> method<br />
52 | &nbsp; &nbsp; <span class="kw1">super</span>.<span class="me1">upcase</span><br />
53 | &nbsp; <span class="kw1">end</span><br />
54 | <span class="kw1">end</span></p>
55 | <p><span class="kw1">class</span> Foo<br />
56 | &nbsp; <span class="kw1">def</span> method<br />
57 | &nbsp; &nbsp; <span class="st0">&quot;foo&quot;</span><br />
58 | &nbsp; <span class="kw1">end</span><br />
59 | <span class="kw1">end</span></p>
60 | <p>foo = Foo.<span class="me1">new</span><br />
61 | foo.<span class="me1">extend</span><span class="br0">&#40;</span>AddAString<span class="br0">&#41;</span><br />
62 | foo.<span class="me1">extend</span><span class="br0">&#40;</span>UpperCaseResults<span class="br0">&#41;</span></p>
63 | <p><span class="kw3">puts</span> foo.<span class="me1">method</span> <span class="co1"># =&gt; 'ADDED A STRING: FOO' </span></div>
64 | </div>
65 | <p>Conditional functionality added to methods with no weird "alias method chain" involvement.  Awesome!</p>
66 | <p>I love ruby!</p>
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Feedjira
  2 | 
  3 | [![Build Status][actions-badge]][actions] [![Code Climate][code-climate-badge]][code-climate]
  4 | 
  5 | [actions-badge]: https://github.com/feedjira/feedjira/actions/workflows/ruby.yml/badge.svg?branch=main
  6 | [actions]: https://github.com/feedjira/feedjira/actions?query=branch%3Amain
  7 | [code-climate-badge]: https://codeclimate.com/github/feedjira/feedjira/badges/gpa.svg
  8 | [code-climate]: https://codeclimate.com/github/feedjira/feedjira
  9 | [gitter-badge]: https://badges.gitter.im/feedjira/feedjira.svg
 10 | 
 11 | Feedjira is a Ruby library designed to parse feeds.
 12 | 
 13 | ## Installation
 14 | 
 15 | Add this line to your application's Gemfile:
 16 | 
 17 | ```ruby
 18 | gem "feedjira"
 19 | ```
 20 | 
 21 | ## Parsing
 22 | 
 23 | An example of parsing a feed with Feedjira:
 24 | 
 25 | ```ruby
 26 | xml = HTTParty.get(url).body
 27 | feed = Feedjira.parse(xml)
 28 | feed.entries.first.title
 29 | # => "Announcing version 3.0"
 30 | ```
 31 | 
 32 | ## Specifying parser
 33 | 
 34 | If you have the XML and just want to provide a parser class for one parse, you
 35 | can specify that using `parse` with the parser option:
 36 | 
 37 | ```ruby
 38 | Feedjira.parse(xml, parser: MyAwesomeParser)
 39 | ```
 40 | 
 41 | ## Adding attributes to all feeds types / all entries types
 42 | 
 43 | ```ruby
 44 | # Add the generator attribute to all feed types
 45 | Feedjira::Feed.add_common_feed_element("generator")
 46 | xml = HTTParty.get("http://www.pauldix.net/atom.xml").body
 47 | Feedjira.parse(xml).generator
 48 | # => "TypePad"
 49 | ```
 50 | 
 51 | ## Adding attributes to only one class
 52 | 
 53 | If you want to add attributes for only one class you simply have to declare them
 54 | in the class
 55 | 
 56 | ```ruby
 57 | # Add some GeoRss information
 58 | class Feedjira::Parser::RSSEntry
 59 |   element "georss:elevation", as: :elevation
 60 | end
 61 | 
 62 | # Fetch a feed containing GeoRss info and print them
 63 | url = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/significant_week.atom"
 64 | xml = HTTParty.get(url).body
 65 | Feedjira.parse(xml).entries.each do |entry|
 66 |   puts "Elevation: #{entry.elevation}"
 67 | end
 68 | ```
 69 | 
 70 | ## Configuration
 71 | 
 72 | ### Parsers
 73 | 
 74 | #### Adding a custom parser
 75 | 
 76 | You can insert your own parser at the front of the available parser list by:
 77 | 
 78 | ```ruby
 79 | Feedjira.configure do |config|
 80 |   config.parsers.unshift(MyAwesomeParser)
 81 | end
 82 | ```
 83 | 
 84 | Now when you call `Feedjira.parse`, `MyAwesomeParser` will be the first one to
 85 | get a chance to parse the feed.
 86 | 
 87 | #### Explicitly set all available parsers
 88 | 
 89 | Feedjira can be configured to use a specific set of parsers and in a specific order:
 90 | 
 91 | ```ruby
 92 | Feedjira.configure do |config|
 93 |   config.parsers = [
 94 |     Feedjira::Parser::ITunesRSS,
 95 |     MyAwesomeParser,
 96 |     Feedjira::Parser::RSS
 97 |   ]
 98 | end
 99 | ```
100 | 
101 | #### Stripping whitespace from XML
102 | 
103 | Feedjira can be configured to strip all whitespace but defaults to lstrip only:
104 | 
105 | ```ruby
106 | Feedjira.configure do |config|
107 |   config.strip_whitespace = true
108 | end
109 | ```
110 | 
111 | ## Contributing
112 | 
113 | Bug reports and pull requests are welcome on GitHub at
114 | https://github.com/feedjira/feedjira. This project is intended to be a safe,
115 | welcoming space for collaboration, and contributors are expected to adhere to
116 | the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
117 | 
118 | ## Projects that use Feedjira
119 | 
120 | Feedjira is used in some awesome projects around the web - from RSS readers to
121 | add-ons and everything in between. Here are some of them:
122 | 
123 | * [Feedbin][]: Feedbin bills itself as a fast, simple RSS reader that delivers a
124 |   great reading experience. It's a paid RSS reader that integrates with mobile
125 |   apps and it even has a fully featured API!
126 | 
127 | * [Stringer][]: Stringer is a self-hosted, anti-social RSS reader. It's an
128 |   open-source project that's easy to deploy to any host, there's even a
129 |   one-click button to deploy on Heroku.
130 | 
131 | * [BlogFeeder][]: BlogFeeder is a paid Shopify App that makes it easy for you to
132 |   import any external blog into your Shopify store. It helps improve your
133 |   store's SEO and keeps your blogs in sync, plus a lot more.
134 | 
135 | * [Feedbunch][]: Feedbunch is an open source feed reader built to fill the hole
136 |   left by Google Reader. It aims to support all features of Google Reader and
137 |   actually improve on others.
138 | 
139 | * [The Old Reader][old]: The Old Reader advertises as the ultimate social RSS
140 |   reader. It's free to start and also has a paid premium version. There's an API
141 |   and it integrates with many different mobile apps.
142 | 
143 | * [Solve for All][solve]: Solve for All combines search engine and feed parsing
144 |   while protecting your privacy. It's even extendable by the community!
145 |   
146 | * [Feedi API][feedi]: Feedi simplifies how you handle RSS, Atom, or JSON feeds. You can add and keep track of your favourite           feed data with a simple and clean REST API. All entries are enriched by Machine Learning and Semantic engines.
147 | 
148 | * [Breaker][breaker]: The social podcast app
149 | 
150 | * [Huginn][huginn]: Huginn is a system for building agents that perform automated tasks for you online.
151 | 
152 | [Feedbin]: https://feedbin.com/
153 | [Stringer]: https://github.com/swanson/stringer
154 | [BlogFeeder]: https://apps.shopify.com/blogfeeder
155 | [Feedbunch]: https://github.com/amatriain/feedbunch
156 | [old]: http://theoldreader.com/
157 | [solve]: https://solveforall.com/
158 | [feedi]: https://github.com/davidesantangelo/feedi
159 | [breaker]: https://breaker.audio
160 | [huginn]: https://github.com/huginn/huginn
161 | 
162 | Note: to get your project on this list, simply [send an email](mailto:feedjira@gmail.com)
163 | with your project's details.
164 | 


--------------------------------------------------------------------------------
/spec/feedjira/parser/atom_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require "spec_helper"
  4 | 
  5 | module Feedjira
  6 |   module Parser
  7 |     describe "#able_to_parse?" do
  8 |       it "returns false if the xmlns value is not the expected value" do
  9 |         xml = '<feed xmlns="http://www.example.org/"></feed>'
 10 |         expect(Atom).not_to be_able_to_parse(xml)
 11 |       end
 12 | 
 13 |       it "returns false if there is no xmlns value" do
 14 |         xml = "<feed></feed>"
 15 |         expect(Atom).not_to be_able_to_parse(xml)
 16 |       end
 17 | 
 18 |       describe "with a w3.org URL present" do
 19 |         it "returns true if contains the expected w3.org URL as xmlns value" do
 20 |           xml = '<feed xmlns="http://www.w3.org/2005/Atom"></feed>'
 21 |           expect(Atom).to be_able_to_parse(xml)
 22 |         end
 23 | 
 24 |         it "returns true if contains the HTTPS variant of the expected w3.org URL as xmlns value" do
 25 |           xml = '<feed xmlns="https://www.w3.org/2005/Atom"></feed>'
 26 |           expect(Atom).to be_able_to_parse(xml)
 27 |         end
 28 | 
 29 |         it "returns true if contains the expected xmlns value with single quotes" do
 30 |           xml = "<feed xmlns='http://www.w3.org/2005/Atom'></feed>"
 31 |           expect(Atom).to be_able_to_parse(xml)
 32 |         end
 33 | 
 34 |         it "returns true with whitespace around the equals sign" do
 35 |           xml = '<feed xmlns = "http://www.w3.org/2005/Atom"></feed>'
 36 |           expect(Atom).to be_able_to_parse(xml)
 37 |         end
 38 |       end
 39 | 
 40 |       describe "with a purl.org URL present" do
 41 |         it "returns true if contains the expected purl.org URL as xmlns value" do
 42 |           xml = '<feed xmlns="http://purl.org/atom/ns#"></feed>'
 43 |           expect(Atom).to be_able_to_parse(xml)
 44 |         end
 45 | 
 46 |         it "returns true if contains the expected xmlns value with single quotes" do
 47 |           xml = "<feed xmlns='http://purl.org/atom/ns#'></feed>"
 48 |           expect(Atom).to be_able_to_parse(xml)
 49 |         end
 50 | 
 51 |         it "returns true with whitespace around the equals sign" do
 52 |           xml = '<feed xmlns = "http://purl.org/atom/ns#"></feed>'
 53 |           expect(Atom).to be_able_to_parse(xml)
 54 |         end
 55 |       end
 56 | 
 57 |       context "with full sample feeds" do
 58 |         it "returns true for an atom feed" do
 59 |           expect(Atom).to be_able_to_parse(sample_atom_feed)
 60 |         end
 61 | 
 62 |         it "returns false for an rdf feed" do
 63 |           expect(Atom).not_to be_able_to_parse(sample_rdf_feed)
 64 |         end
 65 | 
 66 |         it "returns false for an rss feedburner feed" do
 67 |           expect(Atom).not_to be_able_to_parse(sample_rss_feed_burner_feed)
 68 |         end
 69 | 
 70 |         it "returns true for an atom feed that has line breaks in between attributes in the <feed> node" do
 71 |           expect(Atom).to be_able_to_parse(sample_atom_feed_line_breaks)
 72 |         end
 73 |       end
 74 |     end
 75 | 
 76 |     describe "parsing" do
 77 |       before do
 78 |         @feed = Atom.parse(sample_atom_feed)
 79 |       end
 80 | 
 81 |       it "parses the title" do
 82 |         expect(@feed.title).to eq "Amazon Web Services Blog"
 83 |       end
 84 | 
 85 |       it "parses the description" do
 86 |         description = "Amazon Web Services, Products, Tools, and Developer Information..."
 87 |         expect(@feed.description).to eq description
 88 |       end
 89 | 
 90 |       it "parses the icon url" do
 91 |         feed_with_icon = Atom.parse(load_sample("SamRuby.xml"))
 92 |         expect(feed_with_icon.icon).to eq "../favicon.ico"
 93 |       end
 94 | 
 95 |       it "parses the url" do
 96 |         expect(@feed.url).to eq "http://aws.typepad.com/aws/"
 97 |       end
 98 | 
 99 |       it "parses the url even when it doesn't have the type='text/html' attribute" do
100 |         xml = load_sample "atom_with_link_tag_for_url_unmarked.xml"
101 |         feed = Atom.parse xml
102 |         expect(feed.url).to eq "http://www.innoq.com/planet/"
103 |       end
104 | 
105 |       it "parses the feed_url even when it doesn't have the type='application/atom+xml' attribute" do
106 |         feed = Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml"))
107 |         expect(feed.feed_url).to eq "http://www.innoq.com/planet/atom.xml"
108 |       end
109 | 
110 |       it "parses the feed_url" do
111 |         expect(@feed.feed_url).to eq "http://aws.typepad.com/aws/atom.xml"
112 |       end
113 | 
114 |       it "parses no hub urls" do
115 |         expect(@feed.hubs.count).to eq 0
116 |       end
117 | 
118 |       it "parses the hub urls" do
119 |         feed_with_hub = Atom.parse(load_sample("SamRuby.xml"))
120 |         expect(feed_with_hub.hubs.count).to eq 1
121 |         expect(feed_with_hub.hubs.first).to eq "http://pubsubhubbub.appspot.com/"
122 |       end
123 | 
124 |       it "parses entries" do
125 |         expect(@feed.entries.size).to eq 10
126 |       end
127 |     end
128 | 
129 |     describe "preprocessing" do
130 |       it "retains markup in xhtml content" do
131 |         Atom.preprocess_xml = true
132 | 
133 |         feed = Atom.parse sample_atom_xhtml_feed
134 |         entry = feed.entries.first
135 | 
136 |         expect(entry.raw_title).to match(/<i/)
137 |         expect(entry.title).to eq("Sentry Calming Collar for dogs")
138 |         expect(entry.title_type).to eq("xhtml")
139 |         expect(entry.summary).to match(/<b/)
140 |         expect(entry.content).to match(/\A<p/)
141 |       end
142 | 
143 |       it "does not duplicate content when there are divs in content" do
144 |         Atom.preprocess_xml = true
145 | 
146 |         feed = Atom.parse sample_duplicate_content_atom_feed
147 |         content = Nokogiri::HTML(feed.entries[1].content)
148 |         expect(content.css("img").length).to eq 11
149 |       end
150 |     end
151 | 
152 |     describe "parsing url and feed_url" do
153 |       before do
154 |         @feed = Atom.parse(sample_atom_middleman_feed)
155 |       end
156 | 
157 |       it "parses url" do
158 |         expect(@feed.url).to eq "http://feedjira.com/blog"
159 |       end
160 | 
161 |       it "parses feed_url" do
162 |         expect(@feed.feed_url).to eq "http://feedjira.com/blog/feed.xml"
163 |       end
164 | 
165 |       it "does not parse links without the rel='self' attribute as feed_url" do
166 |         xml = load_sample "atom_simple_single_entry.xml"
167 |         feed = Atom.parse xml
168 |         expect(feed.feed_url).to be_nil
169 |       end
170 | 
171 |       it "does not parse links with the rel='self' attribute as url" do
172 |         xml = load_sample "atom_simple_single_entry_link_self.xml"
173 |         feed = Atom.parse xml
174 |         expect(feed.url).to be_nil
175 |       end
176 |     end
177 |   end
178 | end
179 | 


--------------------------------------------------------------------------------
/.github/copilot-instructions.md:
--------------------------------------------------------------------------------
  1 | # Feedjira Ruby Library
  2 | 
  3 | Feedjira is a Ruby library designed to parse feeds (RSS, Atom, JSON feeds). It provides a unified interface for parsing different feed formats and extracting structured data from them.
  4 | 
  5 | Always reference these instructions first and fallback to search or bash commands only when you encounter unexpected information that does not match the info here.
  6 | 
  7 | ## Working Effectively
  8 | 
  9 | Bootstrap, build, and test the repository:
 10 | 
 11 | - `gem install bundler --user-install` -- installs bundler for user-specific installation
 12 | - `export PATH="$HOME/.local/share/gem/ruby/3.2.0/bin:$PATH"` -- add bundler to PATH
 13 | - `bundle config set --local path 'vendor/bundle'` -- configure local bundle installation
 14 | - `bundle install` -- takes 30-60 seconds to complete with dependencies installation. NEVER CANCEL. Set timeout to 120+ seconds.
 15 | - `bundle exec rake` -- runs both tests and rubocop, takes ~3 seconds. NEVER CANCEL. Set timeout to 60+ seconds.
 16 | 
 17 | Run tests specifically:
 18 | - `bundle exec rake spec` -- runs RSpec test suite, takes ~2 seconds. NEVER CANCEL. Set timeout to 60+ seconds.
 19 | - `bundle exec rake rubocop` -- runs code style checks, takes ~3 seconds. NEVER CANCEL. Set timeout to 60+ seconds.
 20 | 
 21 | Generate documentation:
 22 | - `bundle exec yard doc` -- generates API documentation, takes ~2 seconds. NEVER CANCEL. Set timeout to 60+ seconds.
 23 | 
 24 | ## Validation
 25 | 
 26 | Always manually validate any new code by running through complete scenarios after making changes:
 27 | - ALWAYS run the full test suite with `bundle exec rake` before submitting changes.
 28 | - ALWAYS test feed parsing functionality by creating a Ruby script that parses RSS, Atom, and JSON feeds.
 29 | - You can build and test the library successfully - it has excellent test coverage (98%+).
 30 | - Always run `bundle exec rake` (which includes `bundle exec rubocop`) before you are done or the CI (.github/workflows/ruby.yml) will fail.
 31 | 
 32 | ## Working with Feed Parsing
 33 | 
 34 | Test feed parsing functionality:
 35 | ```ruby
 36 | require './lib/feedjira'
 37 | require './spec/sample_feeds'
 38 | include SampleFeeds
 39 | 
 40 | # Parse RSS feed
 41 | rss_xml = sample_rss_feed
 42 | rss_feed = Feedjira.parse(rss_xml)
 43 | puts "RSS Title: #{rss_feed.title}"
 44 | puts "RSS Entries: #{rss_feed.entries.size}"
 45 | 
 46 | # Parse Atom feed  
 47 | atom_xml = sample_atom_feed
 48 | atom_feed = Feedjira.parse(atom_xml)
 49 | puts "Atom Title: #{atom_feed.title}"
 50 | 
 51 | # Parse JSON feed
 52 | json_content = sample_json_feed
 53 | json_feed = Feedjira.parse(json_content)
 54 | puts "JSON Title: #{json_feed.title}"
 55 | ```
 56 | 
 57 | Launch interactive console for testing:
 58 | - `bundle exec irb -r ./lib/feedjira` -- starts IRB with Feedjira loaded
 59 | - `bundle exec pry -r ./lib/feedjira` -- starts Pry console with Feedjira loaded
 60 | 
 61 | ## Project Structure
 62 | 
 63 | ### Repository Root
 64 | ```
 65 | README.md          # Project overview and usage examples
 66 | Gemfile           # Ruby dependencies
 67 | Rakefile          # Build tasks (spec, rubocop, yard)
 68 | feedjira.gemspec  # Gem specification
 69 | .rubocop.yml      # Code style configuration
 70 | .rspec            # RSpec configuration
 71 | ```
 72 | 
 73 | ### Source Code
 74 | - `lib/feedjira.rb` -- Main library file and module definition
 75 | - `lib/feedjira/` -- Core library modules and utilities
 76 | - `lib/feedjira/parser/` -- Feed parser implementations (RSS, Atom, JSON, etc.)
 77 | - `lib/feedjira/core_ext/` -- Ruby core extensions (String, Time, Date)
 78 | 
 79 | ### Tests
 80 | - `spec/feedjira/` -- Main test files organized by module
 81 | - `spec/feedjira/parser/` -- Parser-specific tests
 82 | - `spec/sample_feeds/` -- XML and JSON sample feeds for testing
 83 | - `spec/spec_helper.rb` -- Test configuration and setup
 84 | 
 85 | ### Key Classes and Modules
 86 | - `Feedjira` -- Main module with `.parse()` and `.parser_for_xml()` methods
 87 | - `Feedjira::Parser::RSS` -- RSS feed parser
 88 | - `Feedjira::Parser::Atom` -- Atom feed parser  
 89 | - `Feedjira::Parser::JSONFeed` -- JSON feed parser
 90 | - `Feedjira::Configuration` -- Global configuration options
 91 | 
 92 | ## Common Tasks
 93 | 
 94 | The following are outputs from frequently run commands. Reference them instead of viewing, searching, or running bash commands to save time.
 95 | 
 96 | ### Available Parsers
 97 | When you require the library, these parsers are available in order:
 98 | 1. `Feedjira::Parser::ITunesRSS`
 99 | 2. `Feedjira::Parser::RSSFeedBurner`
100 | 3. `Feedjira::Parser::GoogleDocsAtom`
101 | 4. `Feedjira::Parser::AtomYoutube`
102 | 5. `Feedjira::Parser::AtomFeedBurner`
103 | 6. `Feedjira::Parser::AtomGoogleAlerts`
104 | 7. `Feedjira::Parser::Atom`
105 | 8. `Feedjira::Parser::RSS`
106 | 9. `Feedjira::Parser::JSONFeed`
107 | 
108 | ### Current Version
109 | Feedjira version: 3.2.6
110 | 
111 | ### Dependencies
112 | The project requires Ruby >= 3.1 and depends on:
113 | - `sax-machine` for XML parsing
114 | - `loofah` for HTML sanitization
115 | - `logger` for logging
116 | 
117 | ### Development Dependencies
118 | - `rspec` for testing
119 | - `rubocop` for code style
120 | - `yard` for documentation
121 | - `pry` for debugging
122 | - `faraday` for HTTP requests in tests
123 | - `ox` and `oga` for alternative XML parsing
124 | 
125 | ## Validation Scenarios
126 | 
127 | ### Basic Feed Parsing Validation
128 | After making changes, always test:
129 | 1. Parse an RSS feed and verify title, URL, and entries are extracted
130 | 2. Parse an Atom feed and verify metadata is correctly parsed
131 | 3. Parse a JSON feed and verify structure is maintained
132 | 4. Test parser selection works automatically for different feed types
133 | 
134 | ### Sample Test Script
135 | Create this validation script in `/tmp/test_feedjira.rb`:
136 | ```ruby
137 | require './lib/feedjira'
138 | require './spec/sample_feeds'
139 | include SampleFeeds
140 | 
141 | # Test all major feed types
142 | feeds = [
143 |   { type: 'RSS', content: sample_rss_feed },
144 |   { type: 'Atom', content: sample_atom_feed },
145 |   { type: 'JSON', content: sample_json_feed }
146 | ]
147 | 
148 | feeds.each do |feed_info|
149 |   feed = Feedjira.parse(feed_info[:content])
150 |   puts "✓ #{feed_info[:type]} feed parsed successfully"
151 |   puts "  Title: #{feed.title}"
152 |   puts "  Entries: #{feed.entries.size}"
153 | end
154 | 
155 | puts "✓ All feed types validated successfully"
156 | ```
157 | 
158 | Run with: `bundle exec ruby /tmp/test_feedjira.rb`
159 | 
160 | ## Troubleshooting
161 | 
162 | ### Common Issues
163 | - If bundler is not found: Install with `gem install bundler --user-install` and update PATH
164 | - If bundle install fails with permissions: Use `bundle config set --local path 'vendor/bundle'`
165 | - If tests fail: Check that all dependencies are installed with `bundle install`
166 | - If rubocop fails: Run `bundle exec rubocop -a` to auto-correct style issues
167 | 
168 | ### Environment Requirements
169 | - Ruby 3.1+ (tested on 3.1, 3.2, 3.3, 3.4)
170 | - Bundler gem manager
171 | - Standard UNIX environment (Linux/macOS)
172 | 
173 | ### CI Information
174 | - GitHub Actions runs tests on multiple Ruby versions
175 | - Tests also run with different XML handlers (nokogiri, ox, oga)
176 | - All builds must pass both RSpec tests and RuboCop style checks
177 | 


--------------------------------------------------------------------------------
/spec/feedjira_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require "spec_helper"
  4 | 
  5 | RSpec.describe Feedjira do
  6 |   describe ".parse" do
  7 |     context "when the parser is specified" do
  8 |       it "parses an rss feed" do
  9 |         parser = described_class.parser_for_xml(sample_rss_feed)
 10 |         feed = described_class.parse(sample_rss_feed, parser: parser)
 11 | 
 12 |         expect(feed.title).to eq "Tender Lovemaking"
 13 |         published = Feedjira::Util::ParseTime.call "Thu Dec 04 17:17:49 UTC 2008"
 14 |         expect(feed.entries.first.published).to eq published
 15 |         expect(feed.entries.size).to eq 10
 16 |       end
 17 |     end
 18 | 
 19 |     context "when there's an available parser" do
 20 |       it "parses an rdf feed" do
 21 |         feed = described_class.parse(sample_rdf_feed)
 22 |         expect(feed.title).to eq "HREF Considered Harmful"
 23 |         published = Feedjira::Util::ParseTime.call("Tue Sep 02 19:50:07 UTC 2008")
 24 |         expect(feed.entries.first.published).to eq published
 25 |         expect(feed.entries.size).to eq 10
 26 |       end
 27 | 
 28 |       it "parses an rss feed" do
 29 |         feed = described_class.parse(sample_rss_feed)
 30 |         expect(feed.title).to eq "Tender Lovemaking"
 31 |         published = Feedjira::Util::ParseTime.call "Thu Dec 04 17:17:49 UTC 2008"
 32 |         expect(feed.entries.first.published).to eq published
 33 |         expect(feed.entries.size).to eq 10
 34 |       end
 35 | 
 36 |       it "parses an atom feed" do
 37 |         feed = described_class.parse(sample_atom_feed)
 38 |         expect(feed.title).to eq "Amazon Web Services Blog"
 39 |         published = Feedjira::Util::ParseTime.call "Fri Jan 16 18:21:00 UTC 2009"
 40 |         expect(feed.entries.first.published).to eq published
 41 |         expect(feed.entries.size).to eq 10
 42 |       end
 43 | 
 44 |       it "parses an feedburner atom feed" do
 45 |         feed = described_class.parse(sample_feedburner_atom_feed)
 46 |         expect(feed.title).to eq "Paul Dix Explains Nothing"
 47 |         published = Feedjira::Util::ParseTime.call "Thu Jan 22 15:50:22 UTC 2009"
 48 |         expect(feed.entries.first.published).to eq published
 49 |         expect(feed.entries.size).to eq 5
 50 |       end
 51 | 
 52 |       it "parses an itunes feed" do
 53 |         feed = described_class.parse(sample_itunes_feed)
 54 |         expect(feed.title).to eq "All About Everything"
 55 |         published = Feedjira::Util::ParseTime.call "Wed, 15 Jun 2005 19:00:00 GMT"
 56 |         expect(feed.entries.first.published).to eq published
 57 |         expect(feed.entries.size).to eq 3
 58 |       end
 59 | 
 60 |       it "parses an itunes feedburner feed" do
 61 |         feed = described_class.parse(sample_itunes_feedburner_feed)
 62 |         expect(feed.title).to eq "Welcome to Night Vale"
 63 |         published = Feedjira::Util::ParseTime.call "2023-09-22 16:30:15 UTC"
 64 |         expect(feed.entries.first.published).to eq published
 65 |         expect(feed.entries.size).to eq 3
 66 |         url = "https://www.podtrac.com/pts/redirect.mp3/dovetail.prxu.org/_/126/e3dafc45-a202-42d0-a55b-216e733a2d7a/2023_09_17_BTS_Episode_EXCERPT_v2.mp3"
 67 |         expect(feed.entries.first.enclosure_url).to eq url
 68 |       end
 69 | 
 70 |       it "with nested dc:identifier it does not overwrite entry_id" do
 71 |         feed = described_class.parse(sample_rss_feed_huffpost_ca)
 72 |         expect(feed.title.strip).to eq "HuffPost Canada - Athena2 - All Posts"
 73 |         expect(feed.entries.size).to eq 2
 74 |         expect(feed.entries.first.id).to eq "23246627"
 75 |         expect(feed.entries.last.id.strip).to eq "1"
 76 |       end
 77 | 
 78 |       it "does not fail if multiple published dates exist and some are unparseable" do
 79 |         expect(described_class.logger).to receive(:debug).twice
 80 | 
 81 |         feed = described_class.parse(sample_invalid_date_format_feed)
 82 |         expect(feed.title).to eq "Invalid date format feed"
 83 |         published = Feedjira::Util::ParseTime.call "Mon, 16 Oct 2017 15:10:00 GMT"
 84 |         expect(feed.entries.first.published).to eq published
 85 |         expect(feed.entries.size).to eq 2
 86 |       end
 87 |     end
 88 | 
 89 |     context "when there's no available parser" do
 90 |       it "raises described_class::NoParserAvailable" do
 91 |         expect do
 92 |           described_class.parse("I'm an invalid feed")
 93 |         end.to raise_error(described_class::NoParserAvailable)
 94 |       end
 95 |     end
 96 | 
 97 |     it "parses an feedburner rss feed" do
 98 |       feed = described_class.parse(sample_rss_feed_burner_feed)
 99 |       expect(feed.title).to eq "TechCrunch"
100 |       published = Feedjira::Util::ParseTime.call "Wed Nov 02 17:25:27 UTC 2011"
101 |       expect(feed.entries.first.published).to eq published
102 |       expect(feed.entries.size).to eq 20
103 |     end
104 | 
105 |     it "parses an RSS feed with an a10 namespace" do
106 |       feed = described_class.parse(sample_rss_feed_with_a10_namespace)
107 |       expect(feed.url).to eq "http://www.example.com/"
108 |       expect(feed.entries.first.url).to eq "http://www.example.com/5"
109 |       expect(feed.entries.first.updated).to eq Feedjira::Util::ParseTime.call("2020-05-14T10:00:18Z")
110 |       expect(feed.entries.first.author).to eq "John Doe"
111 |       expect(feed.entries.size).to eq 5
112 |     end
113 |   end
114 | 
115 |   describe ".parser_for_xml" do
116 |     it "with Google Docs atom feed it returns the GoogleDocsAtom parser" do
117 |       xml = sample_google_docs_list_feed
118 |       actual_parser = described_class.parser_for_xml(xml)
119 |       expect(actual_parser).to eq described_class::Parser::GoogleDocsAtom
120 |     end
121 | 
122 |     it "with an atom feed it returns the Atom parser" do
123 |       xml = sample_atom_feed
124 |       actual_parser = described_class.parser_for_xml(xml)
125 |       expect(actual_parser).to eq described_class::Parser::Atom
126 |     end
127 | 
128 |     it "with an atom feedburner feed it returns the AtomFeedBurner parser" do
129 |       xml = sample_feedburner_atom_feed
130 |       actual_parser = described_class.parser_for_xml(xml)
131 |       expect(actual_parser).to eq described_class::Parser::AtomFeedBurner
132 |     end
133 | 
134 |     it "with an rdf feed it returns the RSS parser" do
135 |       xml = sample_rdf_feed
136 |       actual_parser = described_class.parser_for_xml(xml)
137 |       expect(actual_parser).to eq described_class::Parser::RSS
138 |     end
139 | 
140 |     it "with an rss feedburner feed it returns the RSSFeedBurner parser" do
141 |       xml = sample_rss_feed_burner_feed
142 |       actual_parser = described_class.parser_for_xml(xml)
143 |       expect(actual_parser).to eq described_class::Parser::RSSFeedBurner
144 |     end
145 | 
146 |     it "with an rss 2.0 feed it returns the RSS parser" do
147 |       xml = sample_rss_feed
148 |       actual_parser = described_class.parser_for_xml(xml)
149 |       expect(actual_parser).to eq described_class::Parser::RSS
150 |     end
151 | 
152 |     it "with an itunes feed it returns the RSS parser" do
153 |       xml = sample_itunes_feed
154 |       actual_parser = described_class.parser_for_xml(xml)
155 |       expect(actual_parser).to eq described_class::Parser::ITunesRSS
156 |     end
157 | 
158 |     context "when parsers are configured" do
159 |       it "does not use default parsers" do
160 |         xml = "Atom asdf"
161 |         new_parser = Class.new do
162 |           def self.able_to_parse?(_xml)
163 |             true
164 |           end
165 |         end
166 | 
167 |         described_class.configure { |config| config.parsers = [new_parser] }
168 | 
169 |         parser = described_class.parser_for_xml(xml)
170 |         expect(parser).to eq(new_parser)
171 | 
172 |         described_class.reset_configuration!
173 |       end
174 |     end
175 |   end
176 | end
177 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/itunes_feedburner.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:media="http://search.yahoo.com/mrss/" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:podcast="https://podcastindex.org/namespace/1.0" version="2.0">
  3 |   <channel>
  4 |     <title>Welcome to Night Vale</title>
  5 |     <link>http://welcometonightvale.com</link>
  6 |     <pubDate>Fri, 22 Sep 2023 16:30:15 -0000</pubDate>
  7 |     <lastBuildDate>Fri, 29 Sep 2023 15:05:26 -0000</lastBuildDate>
  8 |     <ttl>60</ttl>
  9 |     <language>en</language>
 10 |     <copyright>2016, Night Vale Presents</copyright>
 11 |     <webMaster>help@prx.org (PRX)</webMaster>
 12 |     <description>
 13 |       <![CDATA[<p>Twice-monthly community updates for the small desert town of Night Vale...</p>]]>
 14 |     </description>
 15 |     <managingEditor>info@welcometonightvale.com (info@welcometonightvale.com)</managingEditor>
 16 |     <generator>PRX Feeder v1.0.0</generator>
 17 |     <docs>http://blogs.law.harvard.edu/tech/rss</docs>
 18 |     <image>
 19 |       <url>https://f.prxu.org/126/images/bc7d203a-c0dd-46ec-956b-03bad13ba85e/nightvalelogo-web4.jpg</url>
 20 |       <title>Welcome to Night Vale</title>
 21 |       <link>http://welcometonightvale.com</link>
 22 |       <width>1400</width>
 23 |       <height>1400</height>
 24 |       <description>Twice-monthly community updates for the small desert town of Night Vale, where every conspiracy theory is true.</description>
 25 |     </image>
 26 |     <atom:link href="https://feeds.feedburner.com/welcometonightvalepodcast" rel="self" type="application/rss+xml"/>
 27 |     <itunes:author>Night Vale Presents</itunes:author>
 28 |     <itunes:type>episodic</itunes:type>
 29 |     <itunes:category text="Fiction">
 30 |       <itunes:category text="Science Fiction"/>
 31 |     </itunes:category>
 32 |     <itunes:image href="https://f.prxu.org/126/images/1f749c5d-c83a-4db9-8112-a3245da49c54/nightvalelogo-web4.jpg"/>
 33 |     <itunes:explicit>false</itunes:explicit>
 34 |     <itunes:owner>
 35 |       <itunes:email>info@welcometonightvale.com</itunes:email>
 36 |       <itunes:name>Welcome to Night Vale</itunes:name>
 37 |     </itunes:owner>
 38 |     <itunes:subtitle>Twice-monthly community updates for the small desert town of Night Vale, where every conspiracy theory is true.</itunes:subtitle>
 39 |     <itunes:summary>
 40 |       <![CDATA[Twice-monthly community updates for the small desert town of Night Vale...]]>
 41 |     </itunes:summary>
 42 |     <itunes:keywords>cecil,commonplace,cranor,fink,lovecraft,neofuturists,night,nightvale,nightvaleradio,radio,vale,welcome</itunes:keywords>
 43 |     <media:copyright>2016, Night Vale Presents</media:copyright>
 44 |     <media:thumbnail url="https://f.prxu.org/126/images/bc7d203a-c0dd-46ec-956b-03bad13ba85e/nightvalelogo-web4.jpg"/>
 45 |     <media:keywords>cecil,commonplace,cranor,fink,lovecraft,neofuturists,night,nightvale,nightvaleradio,radio,vale,welcome</media:keywords>
 46 |     <media:category scheme="http://www.itunes.com/dtds/podcast-1.0.dtd">Fiction</media:category>
 47 |     <item>
 48 |       <guid isPermaLink="false">prx_126_e3dafc45-a202-42d0-a55b-216e733a2d7a</guid>
 49 |       <title>Patreon Preview: Behind the Scenes (September 2023)</title>
 50 |       <pubDate>Fri, 22 Sep 2023 16:30:15 -0000</pubDate>
 51 |       <link>https://play.prx.org/listen?ge=prx_126_e3dafc45-a202-42d0-a55b-216e733a2d7a&amp;uf=https%3A%2F%2Ffeeds.feedburner.com%2Fwelcometonightvalepodcast</link>
 52 |       <description>
 53 |         <![CDATA[<p>An excerpt from our most recent patreon Behind the Scenes episode...</p>]]>
 54 |       </description>
 55 |       <enclosure url="https://www.podtrac.com/pts/redirect.mp3/dovetail.prxu.org/_/126/e3dafc45-a202-42d0-a55b-216e733a2d7a/2023_09_17_BTS_Episode_EXCERPT_v2.mp3" type="audio/mpeg" length="7133938"/>
 56 |       <itunes:subtitle>An excerpt from our most recent patreon Behind the Scenes episode</itunes:subtitle>
 57 |       <itunes:episodeType>full</itunes:episodeType>
 58 |       <itunes:duration>04:57</itunes:duration>
 59 |       <category>
 60 |         <![CDATA[adfree]]>
 61 |       </category>
 62 |       <itunes:author>Night Vale Presents</itunes:author>
 63 |       <itunes:summary>
 64 |         <![CDATA[An excerpt from our most recent patreon Behind the Scenes episode...]]>
 65 |       </itunes:summary>
 66 |       <itunes:image href="https://f.prxu.org/126/e3dafc45-a202-42d0-a55b-216e733a2d7a/images/6e990c53-0f6d-4806-8716-18cba0fedc8a/nightvalelogo_web4.jpg"/>
 67 |       <media:content fileSize="7133938" type="audio/mpeg" url="https://www.podtrac.com/pts/redirect.mp3/dovetail.prxu.org/_/126/e3dafc45-a202-42d0-a55b-216e733a2d7a/2023_09_17_BTS_Episode_EXCERPT_v2.mp3"/>
 68 |       <content:encoded>
 69 |         <![CDATA[<p>An excerpt from our most recent patreon Behind the Scenes episode...]]>
 70 |       </content:encoded>
 71 |     </item>
 72 |     <item>
 73 |       <guid isPermaLink="false">prx_126_e650b67a-a572-4302-81f4-6223331fa429</guid>
 74 |       <title>234 - The Boy</title>
 75 |       <pubDate>Fri, 15 Sep 2023 04:00:00 -0000</pubDate>
 76 |       <link>https://play.prx.org/listen?ge=prx_126_e650b67a-a572-4302-81f4-6223331fa429&amp;uf=https%3A%2F%2Ffeeds.feedburner.com%2Fwelcometonightvalepodcast</link>
 77 |       <description>
 78 |         <![CDATA[<p>This morning we found a boy. He has no name, and no one knows who he is...</p>]]>
 79 |       </description>
 80 |       <enclosure url="https://www.podtrac.com/pts/redirect.mp3/dovetail.prxu.org/_/126/e650b67a-a572-4302-81f4-6223331fa429/nv234_intro.mp3" type="audio/mpeg" length="33024824"/>
 81 |       <itunes:subtitle>This morning we found a boy. He has no name, and no one knows who he is.</itunes:subtitle>
 82 |       <itunes:episodeType>full</itunes:episodeType>
 83 |       <itunes:duration>22:55</itunes:duration>
 84 |       <itunes:author>Night Vale Presents</itunes:author>
 85 |       <itunes:summary>
 86 |         <![CDATA[This morning we found a boy. He has no name, and no one knows who he is...]]>
 87 |       </itunes:summary>
 88 |       <itunes:image href="https://f.prxu.org/126/e650b67a-a572-4302-81f4-6223331fa429/images/9b91ed0e-9e8c-4e4c-93f1-d4fbe8014ad0/episodes60.jpg"/>
 89 |       <media:content fileSize="33024824" type="audio/mpeg" url="https://www.podtrac.com/pts/redirect.mp3/dovetail.prxu.org/_/126/e650b67a-a572-4302-81f4-6223331fa429/nv234_intro.mp3"/>
 90 |       <content:encoded>
 91 |         <![CDATA[<p>This morning we found a boy. He has no name, and no one knows who he is.</p>]]>
 92 |       </content:encoded>
 93 |     </item>
 94 |     <item>
 95 |       <guid isPermaLink="false">prx_126_ee85d62d-87cb-4abc-8f08-b66ccdd2c103</guid>
 96 |       <title>233 - Citizen Spotlight: The Vampire of Lombardi Street</title>
 97 |       <pubDate>Fri, 01 Sep 2023 04:00:00 -0000</pubDate>
 98 |       <link>https://play.prx.org/listen?ge=prx_126_ee85d62d-87cb-4abc-8f08-b66ccdd2c103&amp;uf=https%3A%2F%2Ffeeds.feedburner.com%2Fwelcometonightvalepodcast</link>
 99 |       <description>
100 |         <![CDATA[<p>Luca Albescu lives at 831 Lombardi Street, in a dilapidated Victorian mansion at the top of the hill.</p>]]>
101 |       </description>
102 |       <enclosure url="https://www.podtrac.com/pts/redirect.mp3/dovetail.prxu.org/_/126/ee85d62d-87cb-4abc-8f08-b66ccdd2c103/nv233_intro.mp3" type="audio/mpeg" length="41761016"/>
103 |       <itunes:subtitle>Luca Albescu lives at 831 Lombardi Street, in a dilapidated Victorian mansion at the top of the hill.</itunes:subtitle>
104 |       <itunes:episodeType>full</itunes:episodeType>
105 |       <itunes:duration>28:59</itunes:duration>
106 |       <itunes:author>Night Vale Presents</itunes:author>
107 |       <itunes:summary>
108 |         <![CDATA[Luca Albescu lives at 831 Lombardi Street, in a dilapidated Victorian mansion at the top of the hill.]]>
109 |       </itunes:summary>
110 |       <itunes:image href="https://f.prxu.org/126/ee85d62d-87cb-4abc-8f08-b66ccdd2c103/images/ad5453f1-3ccc-4384-be03-9a4a7558370b/episodes58.jpg"/>
111 |       <media:content fileSize="41761016" type="audio/mpeg" url="https://www.podtrac.com/pts/redirect.mp3/dovetail.prxu.org/_/126/ee85d62d-87cb-4abc-8f08-b66ccdd2c103/nv233_intro.mp3"/>
112 |       <content:encoded>
113 |         <![CDATA[<p>Luca Albescu lives at 831 Lombardi Street, in a dilapidated Victorian mansion at the top of the hill.</p>]]>
114 |       </content:encoded>
115 |     </item>
116 |   </channel>
117 | </rss>
118 | 


--------------------------------------------------------------------------------
/spec/sample_feeds/GoogleDocsList.xml:
--------------------------------------------------------------------------------
  1 | 
  2 | <feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/"
  3 |   xmlns:docs="http://schemas.google.com/docs/2007" xmlns:batch="http://schemas.google.com/gdata/batch"
  4 |   xmlns:gd="http://schemas.google.com/g/2005" gd:etag="W/&quot;DUMFR3YyfCt7ImA9WxNTFU0.&quot;">
  5 | 
  6 | <!-- Unique identifier of this feed.  Not unique between users. -->
  7 | <id>https://docs.google.com/feeds/default/private/full</id>
  8 | 
  9 | <!-- Date this feed was last updated.  Do NOT use this, provided for Atom compliance only. -->
 10 | <updated>2009-08-17T11:10:16.894Z</updated>
 11 | 
 12 | <!-- Title of this feed result. -->
 13 | <title>Available Documents - john.smith.example@gmail.com</title>
 14 | 
 15 | <!-- Link at which a user could consume the same content given here, but in a web browser with a user interface. -->
 16 | <link rel="alternate" type="text/html" href="https://docs.google.com"/>
 17 | 
 18 | <!-- Link at which you can add documents or files using resumable upload. -->
 19 | <link rel="http://schemas.google.com/g/2005#resumable-create-media" type="application/atom+xml"
 20 |   href="https://docs.google.com/feeds/upload/create-session/default/private/full"/>
 21 | 
 22 | <!-- Link at which you can fetch the next page of results from this feed. -->
 23 | <link rel="next" type="application/atom+xml"
 24 |   href="https://docs.google.com/feeds/default/private/full?start-key=EAEaFgoSCb2YGEPMAAACAG"/>
 25 | 
 26 | <!-- Link at which you can fetch this same feed. -->
 27 | <link rel="self" type="application/atom+xml"
 28 |   href="https://docs.google.com/feeds/default/private/full/"/>
 29 | 
 30 | <!-- Link at which you can fetch this same feed. -->
 31 | <link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml"
 32 |   href="https://docs.google.com/feeds/default/private/full"/>
 33 | 
 34 | <!-- Deprecated.  Use resumable-create-media instead. -->
 35 | <!-- Link at which you can POST new entries with metadata only to this feed. -->
 36 | <link rel="http://schemas.google.com/g/2005#post" type="application/atom+xml"
 37 |   href="https://docs.google.com/feeds/default/private/full"/>
 38 | 
 39 | <!-- Link at which you can send batch requests to this feed. -->
 40 | <link rel="http://schemas.google.com/g/2005#batch" type="application/atom+xml"
 41 |   href="https://docs.google.com/feeds/default/private/full/batch"/>
 42 | 
 43 | <!-- Information about the user who authorized this request. -->
 44 | <author>
 45 |   <name>John Smith</name>
 46 |   <email>john.smith.example@gmail.com</email>
 47 | </author>
 48 | 
 49 | <!-- NOT supported, provided for protocol compliance only. -->
 50 | <openSearch:startIndex>1</openSearch:startIndex>
 51 | 
 52 | <!-- The ETag here is used to identify the version of this entry. -->
 53 | <entry gd:etag="'EVJVTBICRit7ImBq'">
 54 |   <!-- A unique, permanent identifier for this entry. -->
 55 |   <id>https://docs.google.com/feeds/id/document%3A12345</id>
 56 | 
 57 |   <!-- Title of this resource. -->
 58 |   <title>2010 Income Tax Policy</title>
 59 | 
 60 |   <!-- Description of this resource (currently visible in the preview pane in the UI). -->
 61 |   <docs:description>Describes how to file income tax for 2010.</docs:description>
 62 | 
 63 |   <!-- Resource ID of this document. -->
 64 |   <gd:resourceId>document:12345</gd:resourceId>
 65 | 
 66 |   <!-- Date this document was created (the "published" name of this element is mis-leading, but this is Atom standard). -->
 67 |   <published>2009-07-22T19:02:57.616Z</published>
 68 | 
 69 |   <!-- Information about the owner of this document (not necessarily the user authorizing this request). -->
 70 |   <author>
 71 |     <name>Jenna Dolsom</name>
 72 |     <email>jenna.dolsom.example@gmail.com</email>
 73 |   </author>
 74 | 
 75 |   <!-- Date this entry was last updated (either by Google's systems, the API, or a user in a web browser). -->
 76 |   <updated>2009-07-29T20:31:39.804Z</updated>
 77 | 
 78 |   <!-- Date this document was last edited by a user in the document editor in a web browser. -->
 79 |   <app:edited xmlns:app="http://www.w3.org/2007/app">2009-07-31T17:21:26.497Z</app:edited>
 80 | 
 81 |   <!-- Information about the user who last modified this entry (not necessarily the user authorizing this request). -->
 82 |   <gd:lastModifiedBy>
 83 |     <name>Aaron Jensen</name>
 84 |     <email>aaron.jensen.example@gmail.com</email>
 85 |   </gd:lastModifiedBy>
 86 | 
 87 |   <!-- Date this document was last viewed in a web browser by any user. -->
 88 |   <gd:lastViewed>2009-07-31T17:21:26.273Z</gd:lastViewed>
 89 | 
 90 |   <!-- The "kind" of this entry.  In this case, a word processing document. -->
 91 |   <category scheme="http://schemas.google.com/g/2005#kind" term="http://schemas.google.com/docs/2007#document" label="document"/>
 92 | 
 93 |   <!-- This entry has been viewed by the user, so it has a "viewed" category. -->
 94 |   <category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#viewed" label="viewed"/>
 95 | 
 96 |   <!-- Link at which you can download the actual document this entry describes. -->
 97 |   <content type="text/html" src="https://docs.google.com/feeds/download/documents/Export?docId=12345"/>
 98 | 
 99 |   <!-- This document is in one collection, detailed here. -->
100 |   <link rel="http://schemas.google.com/docs/2007#parent" type="application/atom+xml"
101 |     href="https://docs.google.com/feeds/default/private/full/folder%3A12345" title="ACollectionName"/>
102 | 
103 |   <!-- Link at which you can open this document in a web browser. -->
104 |   <link rel="alternate" type="text/html" href="https://docs.google.com/Doc?docid=12345&amp;hl=en"/>
105 | 
106 |   <!-- Link at which you can fetch only this entry. -->
107 |   <link rel="self" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/document%3A12345"/>
108 | 
109 |   <!-- Link at which you can PUT updates to this entry. -->
110 |   <link rel="edit" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/document%3A12345"/>
111 | 
112 |   <!-- Link at which you can PUT updates to this entry's content (deprecated, use resumable below). -->
113 |   <link rel="edit-media" type="text/html" href="https://docs.google.com/feeds/default/media/document%3A12345"/>
114 | 
115 |   <!-- Link at which you can PUT resumable updates to this entry's content. -->
116 |   <link rel="http://schemas.google.com/g/2005#resumable-edit-media" type="application/atom+xml"
117 |     href="https://docs.google.com/feeds/upload/create-session/default/private/full/document%3A12345"/>
118 | 
119 |   <!-- Link at which you can fetch a thumbnail of this resource. -->
120 |   <link rel="http://schemas.google.com/docs/2007/thumbnail" type="image/jpeg" href="https://lh3.googleusercontent.com/TQRs812345=s220"/>
121 | 
122 |   <!-- Link at which you can create, retrieve, update, and delete ACL entries for this document. -->
123 |   <gd:feedLink rel="http://schemas.google.com/acl/2007#accessControlList"
124 |     href="https://docs.google.com/feeds/default/private/full/document%3A12345/acl"/>
125 | 
126 |   <!-- Link at which you can create, retrieve, update, and delete revisions of this document. -->
127 |   <gd:feedLink rel="http://schemas.google.com/docs/2007/revisions"
128 |     href="https://docs.google.com/feeds/default/private/full/document%3A12345/revisions"/>
129 | 
130 |   <!-- Number of bytes of the owner's quota this document uses.  Native Google Docs currently use 0 bytes. -->
131 |   <gd:quotaBytesUsed>0</gd:quotaBytesUsed>
132 | 
133 |   <!-- "true" if writers can invite other users to view and edit this document. -->
134 |   <docs:writersCanInvite value="true"/>
135 | 
136 |   <!-- Given for files only. An MD5 checksum used to verify the contents of this file. -->
137 |   <!-- Some old files are being processed. Those files will not have this element yet. -->
138 |   <docs:md5Checksum>2b01142f7481c7b056c4b410d28f33cf</docs:md5Checksum>
139 | 
140 |   <!-- Original filename of file at time of upload, if available. -->
141 |   <!-- Only available for resources of type file or pdf. -->
142 |   <!-- Shown here as example only. This element is not given for resources of type document. -->
143 |   <docs:filename>MyFile.pdf</docs:filename>
144 | 
145 |   <!-- Current name of resource, with file extension from docs:filename appended, if available. -->
146 |   <!-- If the current name already has an extension, then the extension from docs:filename is not appended. -->
147 |   <!-- If docs:filename does not have an extension, then the current name is given unaltered. -->
148 |   <!-- Only available for resources of type file or pdf. -->
149 |   <!-- Shown here as example only. This element is not given for resources of type document. -->
150 |   <docs:suggestedFilename>TaxDocument.pdf</docs:suggestedFilename>
151 | </entry>
152 | <entry xmlns:gd="http://schemas.google.com/g/2005" gd:etag="'HhJSFgpeRyt7ImBq'">
153 |   <id>https://docs.google.com/feeds/id/pdf%3A12345</id>
154 |   <published>2009-04-09T18:23:09.035Z</published>
155 |   <updated>2009-04-09T18:23:09.035Z</updated>
156 |   <app:edited xmlns:app="http://www.w3.org/2007/app">2009-06-18T22:16:02.388Z</app:edited>
157 |   <category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#starred" label="starred"/>
158 |   <category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#viewed" label="viewed"/>
159 |   <category scheme="http://schemas.google.com/g/2005/labels" term="http://schemas.google.com/g/2005/labels#hidden" label="hidden"/>
160 |   <category scheme="http://schemas.google.com/g/2005#kind" term="http://schemas.google.com/docs/2007#pdf" label="pdf"/>
161 |   <title>PDF's Title</title>
162 |   <content type="application/pdf"
163 |     src="https://doc-04-20-docs.googleusercontent.com/docs/secure/m71240...U1?h=1630126&amp;e=download&amp;gd=true"/>
164 |   <link rel="alternate" type="text/html" href="https://docs.google.com/fileview?id=12345&amp;hl=en"/>
165 |   <link rel="self" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/pdf%3A12345"/>
166 |   <link rel="edit" type="application/atom+xml" href="https://docs.google.com/feeds/default/private/full/pdf%3A12345"/>
167 |   <link rel="edit-media" type="application/pdf" href="https://docs.google.com/feeds/default/media/pdf%3A12345"/>
168 |   <link rel="http://schemas.google.com/g/2005#resumable-edit-media" type="application/atom+xml"
169 |     href="https://docs.google.com/feeds/upload/create-session/default/private/full/pdf%3A12345"/>
170 |   <author>
171 |     <name>user</name>
172 |     <email>user@gmail.com</email>
173 |   </author>
174 |   <gd:resourceId>pdf:12345</gd:resourceId>
175 |   <gd:lastModifiedBy>
176 |     <name>user</name>
177 |     <email>user@gmail.com</email>
178 |   </gd:lastModifiedBy>
179 |   <gd:lastViewed>2009-06-18T22:16:02.384Z</gd:lastViewed>
180 |   <gd:quotaBytesUsed>108538</gd:quotaBytesUsed>
181 |   <docs:writersCanInvite value="false"/>
182 |   <docs:md5Checksum>2b01142f7481c7b056c4b410d28f33cf</docs:md5Checksum>
183 |   <gd:feedLink rel="http://schemas.google.com/acl/2007#accessControlList"
184 |     href="https://docs.google.com/feeds/default/private/full/pdf%3A12345/acl"/>
185 |   <gd:feedLink rel="http://schemas.google.com/docs/2007/revisions"
186 |     href="https://docs.google.com/feeds/default/private/full/document%3A12345/revisions"/>
187 | </entry>
188 | </feed>
189 | 


--------------------------------------------------------------------------------