├── LICENSE.md
├── README.md
├── awesome-feeds
    ├── FORMATS.md
    ├── METADATA.md
    ├── README.md
    └── TIMELINE.md
├── feedfilter
    ├── .gitignore
    ├── CHANGELOG.md
    ├── Manifest.txt
    ├── README.md
    ├── Rakefile
    ├── config
    │   ├── feedburner.txt
    │   └── feedflare.txt
    ├── lib
    │   ├── feedfilter.rb
    │   └── feedfilter
    │   │   ├── ads.rb
    │   │   ├── includes.rb
    │   │   └── version.rb
    └── test
    │   ├── helper.rb
    │   ├── test_ads.rb
    │   ├── test_ads_all.rb
    │   └── test_includes.rb
├── feedfinder
    ├── .gitignore
    ├── CHANGELOG.md
    ├── Manifest.txt
    ├── README.md
    ├── Rakefile
    └── lib
    │   ├── feedfinder.rb
    │   └── feedfinder
    │       └── version.rb
├── feedparser
    ├── .gitignore
    ├── CHANGELOG.md
    ├── Manifest.txt
    ├── NOTES.md
    ├── README.md
    ├── Rakefile
    ├── attic
    │   ├── atom_v03.rb
    │   ├── feed.rb
    │   ├── item.rb
    │   ├── test_atom_from_file.rb
    │   └── test_atom_v03.rb
    ├── feed-models.png
    ├── lib
    │   ├── feedparser.rb
    │   └── feedparser
    │   │   ├── attachment.rb
    │   │   ├── author.rb
    │   │   ├── builder
    │   │       ├── atom.rb
    │   │       ├── json.rb
    │   │       ├── microformats.rb
    │   │       └── rss.rb
    │   │   ├── feed.rb
    │   │   ├── generator.rb
    │   │   ├── item.rb
    │   │   ├── parser.rb
    │   │   ├── tag.rb
    │   │   ├── thumbnail.rb
    │   │   └── version.rb
    ├── sandbox
    │   ├── dumps
    │   │   ├── intertwingly.atom.xml
    │   │   ├── nostarch.rss2.xml
    │   │   └── rubyonrails.atom.xml
    │   ├── testatom.rb
    │   ├── testpp.rb
    │   └── testrss.rb
    └── test
    │   ├── helper.rb
    │   ├── media_rss_example.txt
    │   ├── test_atom_live.rb
    │   ├── test_attachments_live.rb
    │   ├── test_dates.rb
    │   ├── test_microformats.rb
    │   └── test_rss_live.rb
├── feeds
    ├── NOTES.md
    ├── README.md
    ├── Rakefile
    ├── books
    │   ├── nostarch.rss
    │   ├── oreilly.feedburner.atom
    │   └── pragprog.rss
    ├── comics
    │   ├── xkcd.atom
    │   └── xkcd.rss
    ├── misc
    │   ├── byparker.json
    │   ├── daringfireball.atom
    │   ├── daringfireball.json
    │   ├── googlegroups.atom
    │   ├── googlegroups2.atom
    │   ├── headius.atom
    │   ├── indie-blog.html
    │   ├── inessential.json
    │   ├── intertwingly.atom
    │   ├── jsonfeed.json
    │   ├── lambdatheultimate.rss
    │   ├── learnenough.feedburner.atom
    │   ├── ongoing.atom
    │   ├── railstutorial.feedburner.atom
    │   ├── rubyflow.feedburner.rss
    │   ├── rubymine.feedburner.rss
    │   ├── rubyonrails.atom
    │   ├── scripting.rss
    │   └── sitepoint.rss
    ├── news
    │   ├── guardian-facebook.rss
    │   ├── guardian-naomi-klein.rss
    │   ├── guardian-world.rss
    │   ├── nytimes-blogs-bits.rss
    │   ├── nytimes-paul-krugman.rss
    │   ├── nytimes-tech.rss
    │   ├── nytimes-thomas-l-friedman.rss
    │   ├── nytimes.rss
    │   ├── washingtonpost-blogs-innovations.rss
    │   ├── washingtonpost-politics.rss
    │   └── washingtonpost-world.rss
    ├── osm
    │   ├── blog.openstreetmap.rss
    │   ├── blogs.openstreetmap.rss
    │   └── mapbox.rss
    ├── spec
    │   ├── atom
    │   │   ├── author.atom
    │   │   ├── authors.atom
    │   │   └── categories.atom
    │   ├── json
    │   │   ├── example.json
    │   │   ├── microblog.json
    │   │   └── tags.json
    │   ├── microformats
    │   │   └── hentry.html
    │   └── rss
    │   │   ├── author.rss
    │   │   ├── categories.rss
    │   │   └── creator.rss
    └── test
    │   ├── helper.rb
    │   └── test_feeds.rb
├── feedtxt.specs
    ├── README.md
    └── _includes
    │   └── header.html
├── feedtxt
    ├── .gitignore
    ├── HISTORY.md
    ├── Manifest.txt
    ├── README.md
    ├── Rakefile
    ├── lib
    │   ├── feedtxt.rb
    │   └── feedtxt
    │   │   ├── parser.rb
    │   │   ├── parser
    │   │       ├── ini.rb
    │   │       ├── json.rb
    │   │       └── yaml.rb
    │   │   └── version.rb
    └── test
    │   ├── feeds
    │       └── spec
    │       │   ├── example.ini.txt
    │       │   ├── example.json.txt
    │       │   ├── example.yaml.txt
    │       │   ├── podcast.ini.txt
    │       │   ├── podcast.json.txt
    │       │   └── podcast.yaml.txt
    │   ├── helper.rb
    │   ├── test_ini.rb
    │   ├── test_json.rb
    │   ├── test_scanner.rb
    │   └── test_yaml.rb
└── hyperdata
    ├── .gitignore
    ├── CHANGELOG.md
    ├── Manifest.txt
    ├── README.md
    ├── Rakefile
    ├── lib
        ├── hyperdata.rb
        └── hyperdata
        │   ├── builder
        │       └── article.rb
        │   ├── feed.rb
        │   ├── item.rb
        │   ├── parser.rb
        │   └── version.rb
    ├── sandbox
        └── dumps
        │   ├── article.html.txt
        │   └── o-item.html.txt
    └── test
        ├── feeds
            └── spec
            │   ├── article.html
            │   └── o
            │       └── item.html
        ├── helper.rb
        ├── test_article.rb
        └── test_version.rb


/LICENSE.md:
--------------------------------------------------------------------------------
  1 | CC0 1.0 Universal
  2 | 
  3 | Statement of Purpose
  4 | 
  5 | The laws of most jurisdictions throughout the world automatically confer
  6 | exclusive Copyright and Related Rights (defined below) upon the creator and
  7 | subsequent owner(s) (each and all, an "owner") of an original work of
  8 | authorship and/or a database (each, a "Work").
  9 | 
 10 | Certain owners wish to permanently relinquish those rights to a Work for the
 11 | purpose of contributing to a commons of creative, cultural and scientific
 12 | works ("Commons") that the public can reliably and without fear of later
 13 | claims of infringement build upon, modify, incorporate in other works, reuse
 14 | and redistribute as freely as possible in any form whatsoever and for any
 15 | purposes, including without limitation commercial purposes. These owners may
 16 | contribute to the Commons to promote the ideal of a free culture and the
 17 | further production of creative, cultural and scientific works, or to gain
 18 | reputation or greater distribution for their Work in part through the use and
 19 | efforts of others.
 20 | 
 21 | For these and/or other purposes and motivations, and without any expectation
 22 | of additional consideration or compensation, the person associating CC0 with a
 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
 25 | and publicly distribute the Work under its terms, with knowledge of his or her
 26 | Copyright and Related Rights in the Work and the meaning and intended legal
 27 | effect of CC0 on those rights.
 28 | 
 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 30 | protected by copyright and related or neighboring rights ("Copyright and
 31 | Related Rights"). Copyright and Related Rights include, but are not limited
 32 | to, the following:
 33 | 
 34 |   i. the right to reproduce, adapt, distribute, perform, display, communicate,
 35 |   and translate a Work;
 36 | 
 37 |   ii. moral rights retained by the original author(s) and/or performer(s);
 38 | 
 39 |   iii. publicity and privacy rights pertaining to a person's image or likeness
 40 |   depicted in a Work;
 41 | 
 42 |   iv. rights protecting against unfair competition in regards to a Work,
 43 |   subject to the limitations in paragraph 4(a), below;
 44 | 
 45 |   v. rights protecting the extraction, dissemination, use and reuse of data in
 46 |   a Work;
 47 | 
 48 |   vi. database rights (such as those arising under Directive 96/9/EC of the
 49 |   European Parliament and of the Council of 11 March 1996 on the legal
 50 |   protection of databases, and under any national implementation thereof,
 51 |   including any amended or successor version of such directive); and
 52 | 
 53 |   vii. other similar, equivalent or corresponding rights throughout the world
 54 |   based on applicable law or treaty, and any national implementations thereof.
 55 | 
 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of,
 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
 59 | and Related Rights and associated claims and causes of action, whether now
 60 | known or unknown (including existing as well as future claims and causes of
 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum
 62 | duration provided by applicable law or treaty (including future time
 63 | extensions), (iii) in any current or future medium and for any number of
 64 | copies, and (iv) for any purpose whatsoever, including without limitation
 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
 66 | the Waiver for the benefit of each member of the public at large and to the
 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver
 68 | shall not be subject to revocation, rescission, cancellation, termination, or
 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work
 70 | by the public as contemplated by Affirmer's express Statement of Purpose.
 71 | 
 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be
 73 | judged legally invalid or ineffective under applicable law, then the Waiver
 74 | shall be preserved to the maximum extent permitted taking into account
 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
 76 | is so judged Affirmer hereby grants to each affected person a royalty-free,
 77 | non transferable, non sublicensable, non exclusive, irrevocable and
 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in
 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration
 80 | provided by applicable law or treaty (including future time extensions), (iii)
 81 | in any current or future medium and for any number of copies, and (iv) for any
 82 | purpose whatsoever, including without limitation commercial, advertising or
 83 | promotional purposes (the "License"). The License shall be deemed effective as
 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the
 85 | License for any reason be judged legally invalid or ineffective under
 86 | applicable law, such partial invalidity or ineffectiveness shall not
 87 | invalidate the remainder of the License, and in such case Affirmer hereby
 88 | affirms that he or she will not (i) exercise any of his or her remaining
 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims
 90 | and causes of action with respect to the Work, in either case contrary to
 91 | Affirmer's express Statement of Purpose.
 92 | 
 93 | 4. Limitations and Disclaimers.
 94 | 
 95 |   a. No trademark or patent rights held by Affirmer are waived, abandoned,
 96 |   surrendered, licensed or otherwise affected by this document.
 97 | 
 98 |   b. Affirmer offers the Work as-is and makes no representations or warranties
 99 |   of any kind concerning the Work, express, implied, statutory or otherwise,
100 |   including without limitation warranties of title, merchantability, fitness
101 |   for a particular purpose, non infringement, or the absence of latent or
102 |   other defects, accuracy, or the present or absence of errors, whether or not
103 |   discoverable, all to the greatest extent permissible under applicable law.
104 | 
105 |   c. Affirmer disclaims responsibility for clearing rights of other persons
106 |   that may apply to the Work or any use thereof, including without limitation
107 |   any person's Copyright and Related Rights in the Work. Further, Affirmer
108 |   disclaims responsibility for obtaining any necessary consents, permissions
109 |   or other rights required for any use of the Work.
110 | 
111 |   d. Affirmer understands and acknowledges that Creative Commons is not a
112 |   party to this document and has no duty or obligation with respect to this
113 |   CC0 or use of the Work.
114 | 
115 | For more information, please see
116 | <http://creativecommons.org/publicdomain/zero/1.0/>
117 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # (Universal) Web Feed Parser And Normalizer (Atom, RSS, JSON Feed, Feed.TXT, Feed.HTML, HTML w/ Microformats e.g. h-entry, Etc.) And More
 2 | 
 3 | 
 4 | Gem Familiy
 5 | 
 6 | [**feedparser**](feedparser) - web feed parser and normalizer (Atom, RSS, JSON Feed, HTML h-entry, etc.)
 7 | 
 8 | [**feedfilter**](feedfilter) - feed filter and rules for easy (re)use
 9 | 
10 | [**feedtxt**](feedtxt)  - reads Feed.TXT - feeds in text (unicode) - publish & share posts, articles, podcasts, 'n' more
11 | 
12 | 
13 | 
14 | 
15 | More
16 | 
17 | [**feeds**](feeds) - tests, tests, tests - feeds (in Atom, RSS, JSON Feed, HTML h-entry, etc.) with test assertions
18 | 
19 | [**awesome-feeds**](awesome-feeds) - a collection of awesome feeds (formats, readers, services & tools, templates 'n' more) - JSON Feed, RSS, Atom, HTML w/ Microformats, Feed.TXT
20 | 
21 | 


--------------------------------------------------------------------------------
/awesome-feeds/FORMATS.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Awesome Feeds > Formats
  3 | 
  4 | 
  5 | Note: (†) used for historic formats (no longer in use or obsolete or were just experimental)
  6 | 
  7 | 
  8 | ## RSS "Family"
  9 | 
 10 | 
 11 | ### RSS 0.9 (†)  (RDF Site Summary) 
 12 | 
 13 | by Netscape
 14 | 
 15 | 
 16 | ### RDF / RSS 1.0 (†) (RDF Site Summary)
 17 | 
 18 | by rdf-dev-group 
 19 | 
 20 | 
 21 | ### RSS 2.0, 0.92 (†), 0.91 (†)   (Really Simple Syndication)
 22 | 
 23 | by Dave Winer (DW) - formerly UserLand
 24 | 
 25 | #### RSS.js / RSS.json / RSS-in-JSON / RSS-in-JS by Dave Winer
 26 | 
 27 | See [`rssjs.org`](http://rssjs.org).
 28 | 
 29 | - [RSS-in-JSON is a feed format](https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md) by Dave Winer, June 2017
 30 | 
 31 | 
 32 | ### RSS 3.0 (†)
 33 | 
 34 | by Aaron Swartz
 35 | 
 36 | See [RSS 3.0](http://www.aaronsw.com/2002/rss30), 2002
 37 | 
 38 | - [The Road to RSS 3.0](http://www.aaronsw.com/weblog/000574) by Aaron Swartz, September 2002
 39 | 
 40 | Plain Text with key value pairs and multi-line values with indentation.
 41 | 
 42 | 
 43 | 
 44 | ### RSS 5.0   (Really Simple, Stupid or Really Simple Sharing)
 45 | 
 46 | See Feed.TXT
 47 | 
 48 | 
 49 | ### More RSS
 50 | 
 51 | - [RSS @ Wikipedia](https://en.wikipedia.org/wiki/RSS)
 52 | 
 53 | 
 54 | ## Atom
 55 | 
 56 | - [Atom (standard) @ Wikipedia](https://en.wikipedia.org/wiki/Atom_(standard))
 57 | 
 58 | 
 59 | 
 60 | ## JSON Formats
 61 | 
 62 | ### JSON Feed
 63 | 
 64 | See [`jsonfeed.org`](https://jsonfeed.org).
 65 | 
 66 | ### Activity Streams
 67 | 
 68 | See [`activitystrea.ms`](http://activitystrea.ms)
 69 | 
 70 | - [Activity_Streams (format) @ Wikipedia](https://en.wikipedia.org/wiki/Activity_Streams_(format))
 71 | 
 72 | ### Collection+JSON
 73 | 
 74 | See [Collection+JSON - Hypermedia Type](http://amundsen.com/media-types/collection/)
 75 | 
 76 | Collection+JSON is a JSON-based read/write hypermedia-type designed to support management and querying of simple collections. It is similar to the The Atom Syndication Format (RFC4287) and the The Atom Publishing Protocol (RFC5023) . However, Collection+JSON defines both the format and the semantics in a single media type. It also includes support for Query Templates and expanded write support through the use of a Write Template.
 77 | 
 78 | 
 79 | 
 80 | 
 81 | ## YAML Feed (†)
 82 | 
 83 | - [YAMLFeed @ Indie Web](https://indieweb.org/YAMLFeed) (twitter: [yamlfeed](https://twitter.com/yamlfeed)) - Launched as a "practical" lulz joke - keep the formats wars (eg. rss vs atom) alive (e.g. now json vs yaml).
 84 | 
 85 | 
 86 | ## Microformats
 87 | 
 88 | ### Microformats V1  hentry/hatom
 89 | 
 90 | ### Microformats V2  h-entry/h-feed
 91 | 
 92 | 
 93 | 
 94 | ## Feed.TXT
 95 | 
 96 | See [Feed.TXT](https://feedtxt.github.io).
 97 | 
 98 | A Free Feeds Format in Plain Text w/ Structured Meta Data
 99 | 
100 | 
101 | 
102 | 
103 | ## More
104 | 
105 | 


--------------------------------------------------------------------------------
/awesome-feeds/METADATA.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Awesome Feeds > Meta Data
  3 | 
  4 | How many ways to add
  5 | 
  6 | - Author
  7 | - Title
  8 | - Date 
  9 | 
 10 | Let's count ;-)
 11 | 
 12 | 
 13 | 
 14 | ## Person / People
 15 | 
 16 | - **creator**            -- Dublin Core Meta Data
 17 | - **publisher**          -- Dublin Core Meta Data
 18 | - **author**             -- RSS 2.0, Atom, JSON Feed
 19 | - **contributor**        -- Atom
 20 | - **managingEditor**     -- RSS 2.0 Channel
 21 | - **webMaster**          -- RSS 2.0 Channel
 22 | 
 23 | 
 24 | ## Dates
 25 | 
 26 | - **published**         -- Atom
 27 | - **pubDate**           -- RSS 2.0
 28 | - **date_published**    -- JSON Feed
 29 | - **date**              -- Dublin Core Meta Data
 30 | - **updated**           -- Atom
 31 | - **date_modified**     -- JSON Feed
 32 | - **lastBuildDate**     -- RSS 2.0 Channel
 33 | 
 34 | 
 35 | ## Title
 36 | 
 37 | - **title**             -- Atom / RSS 2.0 / JSON Feed
 38 | - **name**
 39 | 
 40 | 
 41 | _2nd Level Title_
 42 | 
 43 | - **subtitle**          -- Atom  
 44 | - **tagline**
 45 | 
 46 | 
 47 | ## Summary
 48 | 
 49 | - **summary**          -- Atom / JSON Feed
 50 | - **description**      -- RSS 2.0
 51 | - **abstract**    
 52 | - **excerpt**
 53 | 
 54 | 
 55 | ## Content
 56 | 
 57 | - **content**          -- Atom (Defaults to Text!), RSS Yahoo! Search (Media) Extension 
 58 | - **content type="text|html|xhtml"**   -- Atom (Defaults to Text!)
 59 | - **content_text**     -- JSON Feed
 60 | - **content_html**     -- JSON Feed
 61 | - **content:encoded**  -- RDF Content Module
 62 | 
 63 | 
 64 | 
 65 | ## Tags / Categories
 66 | 
 67 | - **category**   -- RSS 2.0
 68 | - **category term=**  -- Atom
 69 | - **tags[]**     -- JSON Feed
 70 | - **keywords**
 71 | 
 72 | _Scheme_
 73 | 
 74 | - **scheme**     -- Atom
 75 | - **domain**     -- RSS 2.0
 76 | 
 77 | 
 78 | ## Link
 79 | 
 80 | - **url**      -- JSON Feed
 81 | - **link**     -- RSS 2.0
 82 | - **link href=**   -- Atom 
 83 | 
 84 | 
 85 | _More Links_
 86 | 
 87 | - **home_page_url**   -- JSON Feed (site url)
 88 | - **feed_url**        -- JSON Feed (feed url)
 89 | - **link href= rel="self"**        -- Atom (feed url)
 90 | - **link href= rel="alternate"**   -- Atom (site url)
 91 | 
 92 | 
 93 | ## ID
 94 | 
 95 | - **id**      -- Atom, JSON Feed
 96 | - **guid**    -- RSS 2.0
 97 | - **permalink**
 98 | 
 99 | 
100 | ## Attachments
101 | 
102 | - **attachments[] url=**            -- JSON Feed
103 | - **enclosure url=**              -- RSS 2.0
104 | - **link href= rel="enclosure"**   -- Atom
105 | 
106 | _Examples_
107 | 
108 | JSON Feed:
109 | 
110 | ``` json
111 | "attachments": [
112 |                 {
113 |                     "url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
114 |                     "mime_type": "audio/x-m4a",
115 |                     "size_in_bytes": 89970236,
116 |                     "duration_in_seconds": 6629
117 |                 }
118 |             ]
119 | ```
120 | 
121 | RSS 2.0:
122 | 
123 | ``` xml
124 | <enclosure url="http://www.example.org/myaudiofile.mp3"
125 |                  length="12345"
126 |                  type="audio/mpeg" />
127 | ```
128 | 
129 | Atom:
130 | 
131 | ``` xml
132 |    <link rel="enclosure"
133 |           type="audio/mpeg"
134 |           title="MP3"
135 |           href="http://www.example.org/myaudiofile.mp3"
136 |           length="1234" />
137 |     <link rel="enclosure"
138 |           type="application/x-bittorrent"
139 |           title="BitTorrent"
140 |           href="http://www.example.org/myaudiofile.torrent"
141 |           length="1234" />
142 | ```
143 | 
144 | 
145 | ## More - What's Missing?
146 | 
147 | - add banner image for item / entry?
148 | - add image / cover for feed / channel?
149 | - add (fav)icon for feed / channel?
150 | - add language ?
151 | - add expired yes/no or with date?
152 | 
153 | 


--------------------------------------------------------------------------------
/awesome-feeds/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | JSON Feed • RSS • Atom • HTML w/ Microformats • Feed.TXT 
  3 | 
  4 | 
  5 | # Awesome Feeds
  6 | 
  7 | A collection of awesome feeds (formats, readers, services & tools, templates 'n' more).
  8 | 
  9 | #### _Contributions welcome. Anything missing? Send in a pull request. Thanks._
 10 | 
 11 | 
 12 | 
 13 | [Formats](#formats) •
 14 | [Parsers](#parsers) •
 15 | [Filters](#filters) •
 16 | [Converters](#converters) •
 17 | [Readers](#readers) •
 18 | [Facebook & Co](#facebook--co)
 19 | 
 20 | 
 21 | 
 22 | ## Formats
 23 | 
 24 | _JSON, XML, HTML, YAML, TXT & Friends_
 25 | 
 26 | [JSON Feed](#json-feed) •
 27 | [Atom](#atom) •
 28 | [RSS](#rss) •
 29 | [HTML Microformats](#html-microformats) •
 30 | [Feed.TXT](#feedtxt)
 31 | 
 32 | 
 33 | ### JSON Feed
 34 | 
 35 | - [**jsonfeed.org**](https://jsonfeed.org) (twitter: [jsonfeed](https://twitter.com/jsonfeed)) - JSON Feed Project Site
 36 |   - [**Spec V1 @ JSON Feed**](https://jsonfeed.org/version/1) - Official Specifiaton Version 1.0 - in plain english (yeah!); May 2017
 37 |   - [**Code @ JSON Feed**](https://jsonfeed.org/code) - JSON Feed Templates, Plugins, Parser, Scripts & More
 38 | 
 39 | 
 40 | **More**
 41 | 
 42 | - [**Awesome JSON Feed**](https://github.com/rmlewisuk/awesome-json-feed)
 43 | 
 44 | 
 45 | ### Atom
 46 | 
 47 | _Atom Syndication & Friends_
 48 | 
 49 | - [**Atom Syndication Spec**](https://tools.ietf.org/html/rfc4287), December 2005
 50 | 
 51 | 
 52 | ### RSS
 53 | 
 54 | _Really Simple Syndication & Friends_
 55 | 
 56 | - [**RSS 2.0 Spec**](http://cyber.harvard.edu/rss/rss.html),  July 2003
 57 |   
 58 | 
 59 | ### HTML Microformats
 60 | 
 61 | - [**h-feed Living Spec**](http://microformats.org/wiki/h-feed)
 62 | - [**h-entry Living Spec**](http://microformats.org/wiki/h-entry)  
 63 | 
 64 | 
 65 | ### Feed.TXT
 66 | 
 67 | - [**Feed.TXT**](https://feedtxt.github.io) (github: [feedtxt](https://github.com/feedtxt)) - Feed.TXT Project Site
 68 | 
 69 | 
 70 | 
 71 | ## Parsers
 72 | 
 73 | _Universal Feed Parser & Normalizer_
 74 | 
 75 | **Ruby**
 76 | 
 77 | - [**feedparser**](https://github.com/feedparser/feedparser) (gem: [feedparser](https://rubygems.org/gems/feedparser)) - universal feed parser and normalizer (supports Atom, RSS, JSON, HTML, TXT etc.)
 78 | 
 79 | 
 80 | 
 81 | ## Filters
 82 | 
 83 | **Ruby**
 84 | 
 85 | - [**feedfilter**](https://github.com/feedparser/feedfilter) (gem: [feedfilter](https://rubygems.org/gems/feedfilter)) - feed filter and rules for easy (re)use e.g. strip ads etc.
 86 | 
 87 | 
 88 | 
 89 | ## Converters
 90 | 
 91 | - [**feed2json**](https://feed2json.org) (github: [appsattic/feed2json.org](https://github.com/appsattic/feed2json.org)) - convert rss or atom to json feed
 92 | 
 93 | 
 94 | ## Readers
 95 | 
 96 | **JavaScript**
 97 | 
 98 | - [**JSON Feed Viewer**](https://json-feed-viewer.herokuapp.com) (github: [maximevaillancourt/json-feed-viewer](https://github.com/maximevaillancourt/json-feed-viewer)) - browse through the showcased feeds, or enter a feed url
 99 | - [**feeds React Sample**](https://github.com/playhtml/feeds/tree/master/react)  - simple feeds news reader sample w/ React and JSON feed
100 | 
101 | ## Facebook & Co
102 | 
103 | _Let's build the next thousands facebooks & co news feeds and readers. Join the free & open web and read & share your posts & updates with feeds._
104 | 
105 | - [**Fuck Facebook**](https://daringfireball.net/2017/06/fuck_facebook) by John Gruber, June 2017 
106 | - [**Why I can't/won't point to Facebook blog posts**](http://scripting.com/2017/05/31.html#a110526) by Dave Winer, May 2017 
107 | 
108 | 
109 | 
110 | ## Open Web & Friends
111 | 
112 | _What's the open web? Why care about the future of online news & publishing?_
113 | 
114 | > Seriously guys, nobody gives a shit about the open web. Only your clique.
115 | >
116 | >  -- [Joe Hewitt, June 2017](https://twitter.com/joehewitt/status/870363197580038144)
117 | 
118 | - [**Introducing AltPlatform & our manifesto for the Open Web**](http://altplatform.org/2017/05/30/open-web-manifesto/) by Richard MacManus, May 2017
119 | 
120 | 
121 | ## Meta
122 | 
123 | **License**
124 | 
125 | ![](https://publicdomainworks.github.io/buttons/zero88x31.png)
126 | 
127 | The awesome list is dedicated to the public domain. Use it as you please with no restrictions whatsoever.
128 | 
129 | **Questions? Comments?**
130 | 
131 | Post them to the [wwwmake forum](http://groups.google.com/group/wwwmake). Thanks!
132 | 


--------------------------------------------------------------------------------
/awesome-feeds/TIMELINE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Awesome Feeds > History > Timeline
 4 | 
 5 | 
 6 | ## 2017
 7 | 
 8 | **June**  Feed.TXT introduced.
 9 | 
10 | RSS-in-JS (re)launched by Dave Winer.
11 | 
12 | 
13 | **May**  JSON Feed introduced by
14 | 
15 | 
16 | 
17 | ## 2004
18 | 
19 | ## 2003
20 | 
21 | ## 2002
22 | 
23 | RSS 3.0 introduced by Aaron Swartz. Removed XML, namespaces, etc.
24 | 
25 | 
26 | ## 2000
27 | 
28 | 
29 | ## 1996
30 | 
31 | Meta Content Framework (MCF) developed by Ramanathan V. Guha and others in Apple Computer's Advanced Technology Group.
32 | 


--------------------------------------------------------------------------------
/feedfilter/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | .bundle
 4 | .config
 5 | coverage
 6 | InstalledFiles
 7 | lib/bundler/man
 8 | pkg
 9 | rdoc
10 | spec/reports
11 | test/tmp
12 | test/version_tmp
13 | tmp
14 | 
15 | # YARD artifacts
16 | .yardoc
17 | _yardoc
18 | doc/
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/feedfilter/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 
2 | ### 0.0.1 / 2015-01-08
3 | 
4 | * Everything is new. First release
5 | 
6 | 


--------------------------------------------------------------------------------
/feedfilter/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | Manifest.txt
 3 | README.md
 4 | Rakefile
 5 | config/feedburner.txt
 6 | config/feedflare.txt
 7 | lib/feedfilter.rb
 8 | lib/feedfilter/ads.rb
 9 | lib/feedfilter/includes.rb
10 | lib/feedfilter/version.rb
11 | test/helper.rb
12 | test/test_ads.rb
13 | test/test_ads_all.rb
14 | test/test_includes.rb
15 | 


--------------------------------------------------------------------------------
/feedfilter/README.md:
--------------------------------------------------------------------------------
 1 | # feedfilter gem - feed filter and rules for easy (re)use 
 2 | 
 3 | * home  :: [github.com/feedparser/feedfilter](https://github.com/feedparser/feedfilter)
 4 | * bugs  :: [github.com/feedparser/feedfilter/issues](https://github.com/feedparser/feedfilter/issues)
 5 | * gem   :: [rubygems.org/gems/feedfilter](https://rubygems.org/gems/feedfilter)
 6 | * rdoc  :: [rubydoc.info/gems/feedfilter](http://rubydoc.info/gems/feedfilter)
 7 | * forum :: [groups.google.com/group/wwwmake](http://groups.google.com/group/wwwmake)
 8 | 
 9 | 
10 | ## Usage
11 | 
12 | 
13 | ### `strip_ads`  (in `AdsFilter` module)
14 | 
15 | ```
16 | require 'feedfilter'
17 | 
18 | include FeedFilter::AdsFilter      # lets us use strip_ads
19 | 
20 | 
21 | before_snippet =<<EOS
22 | <div class="feedflare">
23 |  <a href="http://feeds.feedburner.com/~ff/Rubyflow?a=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U">
24 |    <img src="http://feeds.feedburner.com/~ff/Rubyflow?i=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U" border="0"></img>
25 |  </a>
26 | </div>
27 | EOS
28 | 
29 | 
30 | snippet = strip_ads( before_snippet )
31 | 
32 | puts snippet
33 | ```
34 | 
35 | 
36 | ### Use Text Patterns (Regex) for Filters
37 | 
38 | Ads Example:
39 | 
40 | ```
41 | FEEDFLARE_ADS = %r{
42 |      <div[^>]*?
43 |         class=("|')feedflare\1
44 |         [^>]*?>
45 |           .*?
46 |      <\/div>
47 |        }mix
48 | 
49 | FEEDBURNER_BUGS = %r{
50 |       <img[^>]*?
51 |          src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1
52 |          .*?>
53 |        }mix
54 | 
55 | ...
56 | ```
57 | 
58 | or as one-liners (if you prefer)
59 | 
60 | ```
61 | FEEDFLARE_ADS   = %r{<div[^>]*?class=("|')feedflare\1[^>]*?>.*?<\/div>}mi
62 | FEEDBURNER_BUGS = %r{<img[^>]*?src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1.*?>}mi
63 | ...
64 | ```
65 | 
66 | 
67 | ## License
68 | 
69 | ![](https://publicdomainworks.github.io/buttons/zero88x31.png)
70 | 
71 | The `feedfilter` scripts are dedicated to the public domain.
72 | Use it as you please with no restrictions whatsoever.
73 | 
74 | ## Questions? Comments?
75 | 
76 | Send them along to the [wwwmake Forum/Mailing List](http://groups.google.com/group/wwwmake).
77 | Thanks!
78 | 


--------------------------------------------------------------------------------
/feedfilter/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/feedfilter/version.rb'
 3 | 
 4 | Hoe.spec 'feedfilter' do
 5 | 
 6 |   self.version = FeedFilter::VERSION
 7 | 
 8 |   self.summary = "feedfilter - feed filter and rules for easy (re)use"
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/feedreader/feed.filter']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'feedreader@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.extra_deps = [
21 |     ['textutils', '>=1.0.1'],
22 |   ]
23 | 
24 |   self.licenses = ['Public Domain']
25 | 
26 |   self.spec_extras = {
27 |     required_ruby_version: '>= 1.9.2'
28 |   }
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/feedfilter/config/feedburner.txt:
--------------------------------------------------------------------------------
 1 | ####################################
 2 | # feedburner text pattern (regex)
 3 | #
 4 | #  pattern (regex)
 5 | #  ---
 6 | #  test1
 7 | #  ---
 8 | #  test2
 9 | #  ---
10 | #  etc.
11 | 
12 | 
13 | <img[^>]*?
14 |   src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1
15 |   .*?>
16 | 
17 | ---
18 | 
19 | <img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/feedfilter/config/feedflare.txt:
--------------------------------------------------------------------------------
 1 | ###################################
 2 | # feedflare text pattern (regex)
 3 | 
 4 | <div[^>]*?
 5 |   class=("|')feedflare\1
 6 |   [^>]*?>
 7 |     .*?
 8 | <\/div>
 9 | 
10 | ---
11 | 
12 | <div class="feedflare">
13 |  <a href="http://feeds.feedburner.com/~ff/Rubyflow?a=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U">
14 |    <img src="http://feeds.feedburner.com/~ff/Rubyflow?i=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U" border="0"></img>
15 |  </a>
16 | </div>
17 | 
18 | 


--------------------------------------------------------------------------------
/feedfilter/lib/feedfilter.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | # core and stdlibs
 5 | 
 6 | 
 7 | # 3rd party gems/libs
 8 | 
 9 | require 'textutils'
10 | 
11 | # our own code
12 | 
13 | require 'feedfilter/version'  # let it always go first
14 | require 'feedfilter/ads'
15 | require 'feedfilter/includes'
16 | 
17 | 
18 | # say hello
19 | puts FeedFilter.banner     if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
20 | 
21 | 


--------------------------------------------------------------------------------
/feedfilter/lib/feedfilter/ads.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | module FeedFilter
 5 | 
 6 | 
 7 | class AdsFilters
 8 | 
 9 |   include LogUtils::Logging
10 | 
11 |   def initialize
12 |     @filters=[]
13 | 
14 |     names=[
15 |       'feedburner',
16 |       'feedflare'
17 |     ]
18 | 
19 |     names.each do |name|
20 |       logger.debug "  add ads filter #{name}"
21 | 
22 |       b = BlockReader.from_file( "#{FeedFilter.root}/config/#{name}.txt").read
23 |       ## Note: replace newline and space in string for regex (w/o spaces)
24 |       ## Note: add multiline option and ignore case
25 |       regexp = Regexp.new( b[0].gsub( /[\n ]/, '' ), Regexp::MULTILINE|Regexp::IGNORECASE )
26 |       @filters << [name, regexp]
27 |     end
28 |   end
29 | 
30 |   def filter( text )
31 |     @filters.each do |f|
32 |       name     = f[0]
33 |       pattern  = f[1]
34 |       
35 |       text = text.gsub( pattern ) do |m|
36 |         # Note: m - match is just a regular string
37 |         ##  double check if it's true also if regex contains capture groups ???
38 |         puts "strip #{name}:"
39 |         pp m
40 |         ''
41 |       end
42 |     end # each filter
43 |     text
44 |   end  # filter
45 | 
46 | end # AdsFilters
47 | 
48 | 
49 |   def self.strip_ads( text )  
50 |     @@ads_filters ||= FeedFilter::AdsFilters.new
51 |     @@ads_filters.filter( text )
52 |   end
53 | 
54 | 
55 |   module AdsFilter
56 |     def strip_ads( text )
57 |       FeedFilter.strip_ads( text )
58 |     end
59 |   end # module AdsFilter
60 | 
61 | end # module FeedFilter
62 | 
63 | 


--------------------------------------------------------------------------------
/feedfilter/lib/feedfilter/includes.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | module FeedFilter
 5 | 
 6 | 
 7 | class IncludeFilters
 8 | 
 9 |   include LogUtils::Logging
10 | 
11 |   def initialize( includes )
12 |     @includes = includes
13 | 
14 |     ## split terms (allow comma,pipe) - do NOT use space; allows e.g. terms such as github pages
15 |     @terms = includes.split( /\s*[,|]\s*/ )
16 |     ## remove leading and trailing white spaces - check - still required when using \s* ??
17 |     @terms = @terms.map { |term| term.strip }
18 |   end
19 | 
20 | 
21 |   def match_item?( item )
22 |     match_terms?( item.title   ) ||
23 |     match_terms?( item.summary ) ||
24 |     match_terms?( item.content )
25 |   end
26 | 
27 | private
28 | 
29 |   def match_terms?( text )   ### make helper method private - why? why not??
30 |     return false  if text.nil? || text.empty?     ## allow/guard against nil and empty string (use blank?)
31 | 
32 |     @terms.each do |term|
33 |       if /#{term}/i =~ text      ## Note: lets ignore case (use i regex option) 
34 |         return true
35 |       end
36 |     end
37 | 
38 |     false  # no term match found
39 |   end
40 | 
41 | end # class IncludeFilters
42 | 
43 | end # module FeedFilter
44 | 


--------------------------------------------------------------------------------
/feedfilter/lib/feedfilter/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module FeedFilter
 4 | 
 5 |   MAJOR = 1
 6 |   MINOR = 1
 7 |   PATCH = 1
 8 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
 9 | 
10 |   def self.version
11 |     VERSION
12 |   end
13 | 
14 |   def self.banner
15 |     "feedfilter/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16 |   end
17 | 
18 |   def self.root
19 |     "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
20 |   end
21 | 
22 | end # module FeedFilter
23 | 
24 | 


--------------------------------------------------------------------------------
/feedfilter/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | 
 4 | ## minitest setup
 5 | 
 6 | require 'minitest/autorun'
 7 | 
 8 | 
 9 | ## our own code
10 | 
11 | require 'feedfilter'
12 | 
13 | LogUtils::Logger.root.level = :debug
14 | 
15 | 


--------------------------------------------------------------------------------
/feedfilter/test/test_ads.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_ads.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | class TestAds < MiniTest::Test
11 | 
12 |   include FeedFilter::AdsFilter
13 | 
14 | 
15 |   def test_feedflare_ads
16 |      text =<<EOS
17 | <div class="feedflare">
18 |  <a href="http://feeds.feedburner.com/~ff/Rubyflow?a=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U">
19 |    <img src="http://feeds.feedburner.com/~ff/Rubyflow?i=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U" border="0"></img>
20 |  </a>
21 | </div>
22 | EOS
23 |      text = strip_ads( text ).strip
24 | 
25 |      assert_equal '', text
26 |   end
27 | 
28 | 
29 |   def test_feedburner_bugs
30 |      text =<<EOS
31 | <img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>
32 | EOS
33 |      text = strip_ads( text ).strip
34 | 
35 |      assert_equal '', text
36 |   end
37 | 
38 | end # class TestAds
39 | 


--------------------------------------------------------------------------------
/feedfilter/test/test_ads_all.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_ads_all.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | class TestAdsAll < MiniTest::Test
11 | 
12 |   def test_all
13 |     names=[
14 |       'feedburner',
15 |       'feedflare'
16 |     ]
17 | 
18 |     names.each do |name|
19 |       b = BlockReader.from_file( "#{FeedFilter.root}/config/#{name}.txt").read
20 |       ## Note: replace newline and space in string for regex (w/o spaces)
21 |       ## Note: add multiline option and ignore case
22 |       regexp = Regexp.new( b[0].gsub( /[\n ]/, '' ), Regexp::MULTILINE|Regexp::IGNORECASE )
23 |       test1  = b[1]
24 | 
25 |       assert_equal '', test1.gsub( regexp, '' ).strip
26 |     end
27 |   end
28 | 
29 | end # class TestAdsAll
30 | 


--------------------------------------------------------------------------------
/feedfilter/test/test_includes.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_includes.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | TestItem = Struct.new( :title, :summary, :content )
11 | 
12 | class TestIncludes < MiniTest::Test
13 | 
14 |   def test_item
15 |     includesFilter = FeedFilter::IncludeFilters.new( 'github pages|jekyll' )
16 | 
17 |     item1 = TestItem.new
18 |     item1.title   = 'title'
19 |     item1.summary = 'summary'
20 |     item1.content = 'content'
21 | 
22 |     item2 = TestItem.new
23 |     item2.title   = 'title'
24 |     item2.summary = 'summary'
25 |     item2.content = 'bla bla JEKYLL bla bla'
26 |   
27 |     assert false == includesFilter.match_item?( item1 )
28 |     assert true  == includesFilter.match_item?( item2 )
29 |   end
30 | 
31 | end # class TestIncludes
32 | 
33 | 


--------------------------------------------------------------------------------
/feedfinder/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | .bundle
 4 | .config
 5 | coverage
 6 | InstalledFiles
 7 | lib/bundler/man
 8 | pkg
 9 | rdoc
10 | spec/reports
11 | test/tmp
12 | test/version_tmp
13 | tmp
14 | 
15 | # YARD artifacts
16 | .yardoc
17 | _yardoc
18 | doc/
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/feedfinder/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 
2 | ### 0.0.1 / 2017-06-05
3 | 
4 | * Everything is new. First release
5 | 


--------------------------------------------------------------------------------
/feedfinder/Manifest.txt:
--------------------------------------------------------------------------------
1 | HISTORY.md
2 | Manifest.txt
3 | README.md
4 | Rakefile
5 | lib/feedfinder.rb
6 | lib/feedfinder/version.rb
7 | 


--------------------------------------------------------------------------------
/feedfinder/README.md:
--------------------------------------------------------------------------------
 1 | # feedfinder gem - web feed finder and discovery (RSS, Atom, JSON Feed, etc.)
 2 | 
 3 | * home  :: [github.com/feedparser/feedfinder](https://github.com/feedparser/feedfinder)
 4 | * bugs  :: [github.com/feedparser/feedfinder/issues](https://github.com/feedparser/feedfinder/issues)
 5 | * gem   :: [rubygems.org/gems/feedfinder](https://rubygems.org/gems/feedfinder)
 6 | * rdoc  :: [rubydoc.info/gems/feedfinder](http://rubydoc.info/gems/feedfinder)
 7 | * forum :: [groups.google.com/group/wwwmake](http://groups.google.com/group/wwwmake)
 8 | 
 9 | 
10 | ## Usage
11 | 
12 | To be done.
13 | 
14 | 
15 | ## License
16 | 
17 | ![](https://publicdomainworks.github.io/buttons/zero88x31.png)
18 | 
19 | The `feedfinder` scripts are dedicated to the public domain.
20 | Use it as you please with no restrictions whatsoever.
21 | 
22 | ## Questions? Comments?
23 | 
24 | Send them along to the [wwwmake Forum/Mailing List](http://groups.google.com/group/wwwmake).
25 | Thanks!
26 | 


--------------------------------------------------------------------------------
/feedfinder/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/feedfinder/version.rb'
 3 | 
 4 | Hoe.spec 'feedfinder' do
 5 | 
 6 |   self.version = FeedFinder::VERSION
 7 | 
 8 |   self.summary = "feedfinder - web feed finder and discovery (RSS, Atom, JSON Feed, etc.)"
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/feedparser/feedfinder']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'wwwmake@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.extra_deps = [
21 |     ['textutils', '>=1.0.1'],
22 |   ]
23 | 
24 |   self.licenses = ['Public Domain']
25 | 
26 |   self.spec_extras = {
27 |     required_ruby_version: '>= 1.9.2'
28 |   }
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/feedfinder/lib/feedfinder.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | # core and stdlibs
 5 | 
 6 | 
 7 | # 3rd party gems/libs
 8 | 
 9 | require 'textutils'
10 | 
11 | # our own code
12 | 
13 | require 'feedfinder/version'  # let it always go first
14 | 
15 | 
16 | # say hello
17 | puts FeedFinder.banner     if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
18 | 


--------------------------------------------------------------------------------
/feedfinder/lib/feedfinder/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module FeedFinder
 4 | 
 5 |   MAJOR = 0
 6 |   MINOR = 2
 7 |   PATCH = 0
 8 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
 9 | 
10 |   def self.version
11 |     VERSION
12 |   end
13 | 
14 |   def self.banner
15 |     "feedfinder/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16 |   end
17 | 
18 |   def self.root
19 |     "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
20 |   end
21 | 
22 | end # module FeedFinder
23 | 


--------------------------------------------------------------------------------
/feedparser/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | .bundle
 4 | .config
 5 | coverage
 6 | InstalledFiles
 7 | lib/bundler/man
 8 | pkg
 9 | rdoc
10 | spec/reports
11 | test/tmp
12 | test/version_tmp
13 | tmp
14 | 
15 | # YARD artifacts
16 | .yardoc
17 | _yardoc
18 | doc/
19 | 


--------------------------------------------------------------------------------
/feedparser/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ### 2.2.1
2 | ### 0.1.0 / 2013-09-19
3 | 
4 | * Everything is new. First release.
5 | 


--------------------------------------------------------------------------------
/feedparser/Manifest.txt:
--------------------------------------------------------------------------------
 1 | CHANGELOG.md
 2 | Manifest.txt
 3 | README.md
 4 | Rakefile
 5 | lib/feedparser.rb
 6 | lib/feedparser/attachment.rb
 7 | lib/feedparser/author.rb
 8 | lib/feedparser/builder/atom.rb
 9 | lib/feedparser/builder/json.rb
10 | lib/feedparser/builder/microformats.rb
11 | lib/feedparser/builder/rss.rb
12 | lib/feedparser/feed.rb
13 | lib/feedparser/generator.rb
14 | lib/feedparser/item.rb
15 | lib/feedparser/parser.rb
16 | lib/feedparser/tag.rb
17 | lib/feedparser/thumbnail.rb
18 | lib/feedparser/version.rb
19 | test/helper.rb
20 | test/media_rss_example.txt
21 | test/test_atom_live.rb
22 | test/test_attachments_live.rb
23 | test/test_dates.rb
24 | test/test_microformats.rb
25 | test/test_rss_live.rb
26 | 


--------------------------------------------------------------------------------
/feedparser/NOTES.md:
--------------------------------------------------------------------------------
 1 | # Notes
 2 | 
 3 | ## Add Attachments Update
 4 | 
 5 | - [ ] add attachments to jsonfeed
 6 | - [ ] add support for multiple attachments / media enclosures in atom
 7 | 
 8 | ## Fix head lookahead (in parse)
 9 | 
10 | ```
11 | @head = @text[0..100].strip     # note: remove leading spaces if present
12 | change to
13 | @text.lstrip[0..100]   ## first strip whitespace (or better use lstrip?) avoids all leading blanks in extreme case
14 | # or
15 | @text.lstrip.[0..100]   ## more clear?
16 | ```
17 | 
18 | 
19 | ## Check SSL Bug?
20 | 
21 | ```
22 | ### returns ssl error e.g.
23 | ## OpenSSL::SSL::SSLError: SSL_connect SYSCALL returned=5 errno=0 
24 | ##     state=SSLv2/v3 read server
25 | def test_googlegroup
26 |   feed = fetch_and_parse_feed( 'https://groups.google.com/forum/feed/beerdb/topics/atom.xml?num=15' )
27 | 
28 |   assert_equal 'atom', feed.format
29 |   assert_equal 'https://groups.google.com/d/forum/beerdb', feed.url
30 | end
31 | ```
32 | 
33 | 
34 | ## More ToDos
35 | 
36 | - [ ] add published_confirmation (like password_confirmation) for dc:date duplicate if pubDate is (also) present?
37 |       - check if dates are the same ?? issue warning if different??
38 | 
39 | - [ ] add "raw" published_text date string to all formats
40 | 
41 | - [ ] add related_url for atom; use link rel=related
42 | 
43 | - [ ] add published_local, updated_local to atom, rss and json (for feed not just items)
44 | 
45 | - [ ] change .rss2 to simple .rss
46 |    - rss 2.0 is just a "better" compatible version of the 0.9x series (0.90, 0.91, 0.92)
47 | 
48 | - [ ] reorg feeds
49 |    - use new feedburner folder - move all feeds "managed" by feedburner to folder
50 |    - use a new google folder - why? why not?  incl. google forum and blogger feed - why? why not?
51 |    - for all remaining use a misc folder - why? why not??
52 | 
53 | - [ ]  convert all dates to utc e.g. use DateTime#utc - why? why not?
54 |        - example: 2015-01-16 08:33:57 UTC <= rfc822 Fri, 16 Jan 2015 09:33:57 +0100
55 |        - or 2017-05-17 15:02:12 UTC <= iso8601 2017-05-17T08:02:12-07:00
56 |        - and so on
57 | 
58 | - [ ]  check intertwingly.atom feed - uses relative urls - how to make absolute ??
59 |        - feed.url:       /blog/
60 |        - feed.items[0].url:      /blog/2017/04/07/Badges-We-dont-need-no-stinkin-badges
61 | 
62 | 
63 | - [x]  change feed.generator_uri to generator_url  (keep uri as alias)
64 | 
65 | - [ ]   turn gernerator into a struct (instead of three strings)   
66 |         - use generator.name, generator.url, generator.version, etc.
67 |         - add alias for generator.name == generator.title  e.g. name = title
68 | 
69 | 
70 | 
71 | ## Limitations of Stdlib RSS reader
72 | 
73 | ### RSS 2.0
74 | 
75 | Cannot read feed_url link using atom:link type="self" e.g.:
76 | 
77 | ```
78 | <atom:link href="https://www.nostarch.com/feeds/comingsoon.xml?startat=tcpip"
79 |            rel="self"
80 |            type="application/rss+xml" />
81 | 
82 | <atom:link href="https://pragprog.com/feed/global"
83 |            rel="self"
84 |            type="application/rss+xml"/>
85 | 
86 | ```
87 | 
88 | see books/nostarch.rss2 and others as examples.
89 | 


--------------------------------------------------------------------------------
/feedparser/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/feedparser/version.rb'
 3 | 
 4 | Hoe.spec 'feedparser' do
 5 | 
 6 |   self.version = FeedParser::VERSION
 7 | 
 8 |   self.summary = 'feedparser - web feed parser and normalizer (RSS, Atom, JSON Feed, HTML h-entry, etc.)'
 9 |   self.description = summary
10 | 
11 |   self.urls    = { home: 'https://github.com/feedparser/feedparser' }
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'gerald.bauer@gmail.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'CHANGELOG.md'
19 | 
20 |   self.extra_deps = [
21 |     ['logutils', '>=0.6.1'],
22 |     ['textutils', '>=1.0.0'],
23 |     ## ['oga', '>=3.2.0'],   note: oga is a "soft" dependency
24 |   ]
25 | 
26 |   ###  todo: add fetcher dep for testing (e.g. development only)
27 | 
28 |   self.licenses = ['Public Domain']
29 | 
30 |   self.spec_extras = {
31 |    required_ruby_version: '>= 2.2.2'
32 |   }
33 | 
34 | end
35 | 


--------------------------------------------------------------------------------
/feedparser/attic/atom_v03.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | ###
 5 | # hack:
 6 | ##  try to patch/convert old obsolete atom v0.3 to v1(-ish)
 7 | ##
 8 | ##  in <feed> convert
 9 | ##   version="0.3"                     =>  removed/dropped! - use ns for version
10 | ##   xmlns="http://purl.org/atom/ns#"  => xmlns="http://www.w3.org/2005/Atom"
11 | ##
12 | ##  <modified>2014-12-31T15:33:00Z</modified>  => <updated>
13 | ##  <issued>2014-12-31T13:02:07Z</issued>  => <published>
14 | ##
15 | ##
16 | ## more changes:
17 | ##   author url   => author uri
18 | ##   generator @url => generator @uri
19 | ##   tagline    => subtitle
20 | ##   copyright  => rights
21 | ##  <created>2014-12-31T13:02:07Z</created>  =>  removed/dropped!
22 | ##
23 | ##  todo/fix:  fix/convert content @type @mode - why?? why not??
24 | ##
25 | ##  content @mode => removed/dropped!
26 | ##  @type=text/plain @mode=escaped     => @type=text
27 | ##  @type=text/html  @mode=escaped     => @type=html
28 | 
29 | 
30 | ## see also
31 | ##  - rakaz.nl/2005/07/moving-from-atom-03-to-10.html
32 | 
33 | 
34 | module FeedParser
35 | 
36 | class AtomV03Helper
37 | 
38 |   include LogUtils::Logging
39 | 
40 |   def match?( xml )
41 |     ## Note: =~ return nil on match; convert to boolean e.g. always return true|false
42 |     (xml =~ /<feed\s+version="0\.3"/) != nil
43 |   end
44 | 
45 |   def convert( xml )
46 |     xml = xml.sub( /<feed[^>]+>/ ) do |m|
47 |       ## Note: m passed in is just a string w/ the match (NOT a match data object!)
48 |       ## puts "match (#{m.class.name}): "
49 |       ## pp m
50 |       el = m.sub( /version="0\.3"/, '' )
51 |       el = el.sub( /xmlns="http:\/\/purl\.org\/atom\/ns#"/, 'xmlns="http://www.w3.org/2005/Atom"' )
52 |       el
53 |     end
54 | 
55 |     xml = xml.gsub( /<modified>/, '<updated>' )
56 |     xml = xml.gsub( /<\/modified>/, '</updated>' )
57 | 
58 |     xml = xml.gsub( /<issued>/, '<published>' )
59 |     xml = xml.gsub( /<\/issued>/, '</published>' )
60 |     xml
61 |   end
62 | 
63 | end # class AtomV03Helper
64 | 
65 | end # module FeedParser
66 | 
67 | 


--------------------------------------------------------------------------------
/feedparser/attic/feed.rb:
--------------------------------------------------------------------------------
 1 | module FeedParser
 2 | 
 3 | class Feed
 4 |   ### attr_accessor :object  # not use for now
 5 | 
 6 |   attr_accessor :title_type  # e.g. text|html|html-escaped  (optional) -use - why?? why not??
 7 |   attr_accessor :summary_type   # e.g. text|html|html-escaped
 8 | 
 9 |   def title2?()  @title2.nil? == false;  end
10 |   attr_accessor :title2         # e.g. subtitle (atom)
11 |   attr_accessor :title2_type    # e.g. text|html|html-escaped
12 | 
13 |   def built?()  @built.nil? == false;  end
14 |   attr_accessor :built
15 | 
16 | 
17 | 
18 |   attr_accessor :generator_version  # e.g. @version (atom)
19 |   attr_accessor :generator_url      # e.g. @uri     (atom)
20 | 
21 |   ## note: generator_uri is an alias for generator_url
22 |   alias :generator_uri  :generator_url
23 |   alias :generator_uri= :generator_url=
24 | 
25 | 
26 | 
27 | 
28 | end  # class Feed
29 | 
30 | end # module FeedParser
31 | 


--------------------------------------------------------------------------------
/feedparser/attic/item.rb:
--------------------------------------------------------------------------------
 1 | module FeedParser
 2 | 
 3 | class Item
 4 | 
 5 |   ## attr_accessor :object   # not used for now -- orginal object (e.g RSS item or ATOM entry etc.)
 6 | 
 7 |   attr_accessor :title_type    # optional for now (text|html|html-escaped) - not yet set
 8 | 
 9 |   attr_accessor   :summary_type  # optional for now (text|html|html-escaped) - not yet set
10 | 
11 |   attr_accessor :url      # todo: rename to link (use alias) ??
12 | 
13 | ## todo: add summary (alias description)  ???
14 | 
15 | 
16 | end  # class Item
17 | 
18 | end # module FeedParser
19 | 
20 | 


--------------------------------------------------------------------------------
/feedparser/attic/test_atom_from_file.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | class TestAtomFromFile < MiniTest::Test
 3 | 
 4 |   def test_googlegroup
 5 |     feed = parse_feed_from_file( 'googlegroups.atom' )
 6 | 
 7 |     assert_equal 'atom', feed.format
 8 |     assert_equal 'Google Groups', feed.generator
 9 |     assert_equal 'https://groups.google.com/d/forum/beerdb', feed.url
10 |   end
11 | 
12 |   def test_googlegroup2
13 |     feed = parse_feed_from_file( 'googlegroups2.atom' )
14 | 
15 |     assert_equal 'atom', feed.format
16 |     assert_equal 'Google Groups (w/ leading n trailing newlines stripped)', feed.generator
17 |     assert_equal 'https://groups.google.com/d/forum/beerdb', feed.url
18 |   end
19 | 
20 | end
21 | 
22 | 


--------------------------------------------------------------------------------
/feedparser/attic/test_atom_v03.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class TestAtomV03 < MiniTest::Test
 4 | 
 5 |   def test_match
 6 |     xmlv1   = read_feed_from_file( 'googlegroups.atom' )
 7 |     xmlv03  = read_feed_from_file( 'quirksblog.atom.v03' )
 8 | 
 9 |     atomv03helper = FeedUtils::AtomV03Helper.new
10 | 
11 |     assert_equal false, atomv03helper.match?( xmlv1 )
12 |     assert_equal true,  atomv03helper.match?( xmlv03 )
13 | 
14 |     xmlv03up = atomv03helper.convert( xmlv03 )
15 |     assert_equal false, atomv03helper.match?( xmlv03up )
16 | 
17 |     pp xmlv03up[0..1000]
18 |   end
19 | 
20 |   def test_parse
21 |     feed  = parse_feed_from_file( 'quirksblog.atom.v03' )
22 | 
23 |     pp feed.updated
24 |     assert_equal '2014-12-31T15:33:00+00:00', feed.updated.to_s
25 | 
26 |     pp feed.items[0].updated
27 |     assert_equal '2014-12-31T15:33:00+00:00', feed.items[0].updated.to_s
28 | 
29 |     pp feed.items[1].updated
30 |     assert_equal '2014-11-26T12:11:25+00:00', feed.items[1].updated.to_s
31 |   end
32 | 
33 | end
34 | 


--------------------------------------------------------------------------------
/feedparser/feed-models.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rubycocos/feedparser/c541894604acf8a8d09fa9ba10a1954fd2f6876e/feedparser/feed-models.png


--------------------------------------------------------------------------------
/feedparser/lib/feedparser.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | # core and stdlibs
 5 | 
 6 | require 'rss'
 7 | require 'pp'
 8 | require 'time'    # note: ruby has a builtin core time class and a stdlib time class pack; require stdlib extensions
 9 | require 'date'    # note: ruby has a builtin core date class and a stdlib date class pack; require stdlib extensions
10 | require 'json'
11 | 
12 | 
13 | # 3rd party gems/libs
14 | 
15 | require 'logutils'
16 | require 'textutils'
17 | 
18 | 
19 | # our own code
20 | 
21 | require 'feedparser/version'  # let it always go first
22 | 
23 | require 'feedparser/builder/atom'
24 | require 'feedparser/builder/rss'
25 | require 'feedparser/builder/json'
26 | require 'feedparser/builder/microformats'
27 | 
28 | 
29 | require 'feedparser/feed'
30 | require 'feedparser/item'
31 | require 'feedparser/author'
32 | require 'feedparser/tag'
33 | require 'feedparser/attachment'
34 | require 'feedparser/thumbnail'
35 | require 'feedparser/generator'
36 | require 'feedparser/parser'
37 | 
38 | 
39 | 
40 | # say hello
41 | puts FeedParser.banner     if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
42 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/attachment.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module FeedParser
 4 | 
 5 | class Attachment   ## also known as Enclosure
 6 | 
 7 |   attr_accessor :url
 8 |   ## note: uri is an alias for url
 9 |   alias :uri  :url       ## add atom alias for uri - why? why not?
10 |   alias :uri= :url=
11 | 
12 |   attr_accessor :length
13 |   attr_accessor :type
14 | 
15 |   # Elements from the media namespace attachment
16 |   attr_accessor :title
17 |   attr_accessor :thumbnail
18 |   attr_accessor :description
19 |   attr_accessor :community
20 | 
21 | end  # class Attachment
22 | 
23 | end # module FeedParser
24 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/author.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module FeedParser
 4 | 
 5 | class Author
 6 | 
 7 |   attr_accessor :name
 8 |   attr_accessor :url
 9 |   ## note: uri is an alias for url
10 |   alias :uri  :url       ## add atom alias for uri - why? why not?
11 |   alias :uri= :url=
12 | 
13 |   def email?()   @email.nil? == false;  end
14 |   attr_accessor :email
15 | 
16 |   def avatar?()  @avatar.nil? == false;  end
17 |   attr_accessor :avatar  # todo/check: use avatar_url ?? used by json feed -check if always a url
18 | 
19 | 
20 |   ## todo: add role - why? why not?
21 |   ##   e.g. add contributor (atom)
22 |   ##          or managingEditor (rss) or webMaster (rss) - why? why not??
23 | 
24 |   attr_accessor :text    # note: holds "unparsed" text (content) line form dc:creator or rss:author
25 |   alias :line :text     # line|text  (add str??  too)
26 | 
27 |   def to_s
28 |     ## note: to_s  - allows to use just author in templates
29 |     ##    will by default return name if present or as fallback "unparsed" text line
30 |      if @name    ## not blank
31 |        @name
32 |      else
33 |        @text
34 |      end
35 |   end
36 | 
37 | end  # class Author
38 | 
39 | end # module FeedParser
40 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/builder/json.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | module FeedParser
  4 | 
  5 | class JsonFeedBuilder
  6 | 
  7 |   include LogUtils::Logging
  8 | 
  9 | 
 10 |   def self.build( hash )
 11 |     feed = self.new( hash )
 12 |     feed.to_feed
 13 |   end
 14 | 
 15 |   def initialize( hash )
 16 |     @feed = build_feed( hash )
 17 |   end
 18 | 
 19 |   def to_feed
 20 |     @feed
 21 |   end
 22 | 
 23 | 
 24 | 
 25 |   def build_feed( h )
 26 |     feed = Feed.new
 27 |     feed.format = 'json'
 28 | 
 29 |     feed.title    = h['title']
 30 |     feed.url      = h['home_page_url']
 31 |     feed.feed_url = h['feed_url']
 32 |     feed.summary  = h['description']
 33 | 
 34 | 
 35 |     if h['author']
 36 |       feed.authors << build_author( h['author'] )
 37 |     end
 38 | 
 39 | 
 40 |     h['items'].each do |hash_item|
 41 |       feed.items << build_item( hash_item )
 42 |     end
 43 | 
 44 |     feed # return new feed
 45 |   end # method build_feed_from_json
 46 | 
 47 | 
 48 |   def build_author( h )
 49 |     author = Author.new
 50 | 
 51 |     author.name     = h['name']
 52 |     author.url      = h['url']
 53 |     author.avatar   = h['avatar']
 54 | 
 55 |     author
 56 |   end
 57 | 
 58 | 
 59 | 
 60 |   def build_item( h )
 61 |     item = Item.new   # Item.new
 62 | 
 63 |     item.guid         = h['id']
 64 |     item.title        = h['title']
 65 |     item.url          = h['url']
 66 |     item.external_url = h['external_url']
 67 | 
 68 |     ## convert date if present (from string to date type)
 69 |     date_published_str = h['date_published']
 70 |     if date_published_str
 71 |       item.published_local  = DateTime.iso8601( date_published_str )
 72 |       item.published        = item.published_local.utc
 73 |     end
 74 | 
 75 |     date_modified_str = h['date_modified']
 76 |     if date_modified_str
 77 |       item.updated_local  = DateTime.iso8601( date_modified_str )
 78 |       item.updated        = item.updated_local.utc
 79 |     end
 80 | 
 81 | 
 82 |     item.content_html = h['content_html']
 83 |     item.content_text = h['content_text']
 84 |     item.summary      = h['summary']
 85 | 
 86 |     if h['author']
 87 |       item.authors << build_author( h['author'] )
 88 |     end
 89 | 
 90 |     if h['tags']
 91 |       h['tags'].each do |json_tag|
 92 |         item.tags << build_tag( json_tag )
 93 |       end
 94 |     end
 95 | 
 96 |     item
 97 |   end # method build_item
 98 | 
 99 | 
100 |   def build_tag( json_tag )
101 |     ## pp rss_cat
102 |     tag = Tag.new
103 | 
104 |     tag.name = json_tag
105 | 
106 |     tag
107 |   end  # build_tag
108 | 
109 | 
110 | end # JsonFeedBuilder
111 | end # FeedParser
112 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/builder/microformats.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | module FeedParser
  4 | 
  5 | 
  6 | class HyFeedBuilder
  7 | 
  8 |   include LogUtils::Logging
  9 | 
 10 | 
 11 |   def self.build( hash )
 12 |     feed = self.new( hash )
 13 |     feed.to_feed
 14 |   end
 15 | 
 16 |   def initialize( hash )
 17 |     @feed = build_feed( hash )
 18 |   end
 19 | 
 20 |   def to_feed
 21 |     @feed
 22 |   end
 23 | 
 24 | 
 25 |   def build_feed( h )
 26 | 
 27 |     b = HyBuilder.new( h )     ## convert hash to structs
 28 | 
 29 |     ##  use first feed - more really possible?
 30 |     ##   fix/todo: handle no feed too!!!
 31 |     hy = b.feeds[0]
 32 | 
 33 |     ## pp hy
 34 | 
 35 |     feed = Feed.new
 36 |     feed.format = 'html'
 37 | 
 38 |     ### todo: add
 39 |     ## - feed.title
 40 |     ## - feed.url
 41 |     ## - feed.feed_url
 42 |     ## - feed.summary
 43 |     ## - feed.authors
 44 |     ## etc.
 45 | 
 46 |     hy.entries.each do |entry|
 47 |       feed.items << build_item( entry )
 48 |     end
 49 | 
 50 |     feed # return new feed
 51 |   end # method build_feed
 52 | 
 53 | 
 54 |   def build_author( hy )
 55 |     author = Author.new
 56 | 
 57 |     author.name     = hy.name
 58 | 
 59 |     ## todo - add:
 60 |     ## author.url
 61 | 
 62 |     author
 63 |   end
 64 | 
 65 | 
 66 | 
 67 |   def build_item( hy )
 68 |     item = Item.new   # Item.new
 69 | 
 70 |     item.title           = hy.name
 71 |     item.url             = hy.url
 72 |     item.published_local = hy.published_local
 73 |     item.published       = hy.published
 74 | 
 75 |     item.content_html    = hy.content_html
 76 |     item.content_text    = hy.content_text
 77 |     item.summary         = hy.summary
 78 | 
 79 |     ##  check: how to add an id - auto-generate - why? why not??
 80 |     ## item.id         = h['id']
 81 | 
 82 |     hy.authors.each do |author|
 83 |       item.authors << build_author( author )
 84 |     end
 85 | 
 86 |     item
 87 |   end # method build_item
 88 | 
 89 | end # class HyFeedBuilder
 90 | 
 91 | 
 92 | 
 93 | class HyFeed
 94 |   attr_accessor :entries
 95 | 
 96 |   def initialize
 97 |      @entries = []
 98 |   end
 99 | end # class HyFeed
100 | 
101 | 
102 | class HyEntry
103 |    attr_accessor :name
104 |    attr_accessor :content
105 |    attr_accessor :content_text
106 |    attr_accessor :summary
107 | 
108 |    attr_accessor :published          # utc time
109 |    attr_accessor :published_local    # local time (with timezone/offset)
110 |    attr_accessor :url
111 | 
112 |    attr_accessor :authors      # note: allow multiple authors
113 | 
114 |    # note: title is an alias for name
115 |    alias :title  :name
116 |    alias :title= :name=
117 | 
118 |    # note: content_html is an alias for name
119 |    alias :content_html  :content
120 |    alias :content_html= :content=
121 | 
122 |    def initialize
123 |      @authors = []
124 |    end
125 | 
126 | end  ## class HyEntry
127 | 
128 | 
129 | class HyAuthor
130 |    attr_accessor :name
131 |    attr_accessor :url
132 | end  ## class HyAuthor
133 | 
134 | 
135 | 
136 | 
137 | class HyBuilder
138 | 
139 |    attr_reader :feeds
140 | 
141 |    def initialize( hash )
142 |      @h     = hash
143 |      @feeds = []
144 |      build
145 | 
146 |      pp @feeds
147 |    end
148 | 
149 |    def build
150 | 
151 |     entries = []
152 |     @h['items'].each_with_index do |item_hash,i|
153 |       puts "item #{i+1}:"
154 |       pp item_hash
155 | 
156 |       types = item_hash['type']
157 |       pp types
158 |       if types.include?( 'h-feed' )
159 |         @feeds << build_feed( item_hash )
160 |       elsif types.include?( 'h-entry' )
161 |         entries << build_entry( item_hash )
162 |       else
163 |         ## unknown type; skip for now
164 |       end
165 |     end
166 | 
167 |     ## wrap all "loose" entries in a "dummy" h-entry feed
168 |     if entries.any?
169 |        feed = HyFeed.new
170 |        feed.entries = entries
171 |        @feeds << feed
172 |     end
173 | 
174 |   end # method build
175 | 
176 |   def build_feed( h )
177 |      puts "  build_feed"
178 | 
179 |      feed = HyFeed.new
180 | 
181 |      h['children'].each_with_index do |item_hash,i|
182 |       puts "item #{i+1}:"
183 |       pp item_hash
184 | 
185 |       types = item_hash['type']
186 |       pp types
187 |       if types.include?( 'h-entry' )
188 |         feed.entries << build_entry( item_hash )
189 |       else
190 |         ## unknown type; skip for now
191 |       end
192 |      end
193 | 
194 |      feed
195 |   end  ## method build_feed
196 | 
197 | 
198 |   def build_entry( h )
199 |      puts "  build_entry"
200 | 
201 |      entry = HyEntry.new
202 | 
203 |      props = h['properties']
204 |      pp props
205 | 
206 |      entry.name    = props['name'].join( '  ')     # check an example with more entries (how to join??)
207 | 
208 |      if props['summary']
209 |        entry.summary = props['summary'].join( '  ' )
210 |      end
211 | 
212 |      if props['content']
213 |        ## add up all value attribs in content
214 |        entry.content_text =  props['content'].map { |h| h[:value] }.join( '  ' ).strip
215 |        ## add up all html attribs in content; plus strip leading n trailing whitespaces
216 |        entry.content =  props['content'].map { |h| h[:html] }.join( '  ' ).strip
217 |      end
218 | 
219 | 
220 |      # get first field in array  -- check if really ever possible more than one? what does it mean (many dates)???
221 |      ##  todo: check if datetime is always utc (or local possible?)
222 |      url_str = props.fetch( 'url', [] )[0]
223 |      if url_str
224 |        entry.url = url_str
225 |      end
226 | 
227 |      # get first field in array  -- check if really ever possible more than one? what does it mean (many dates)???
228 |      ##  todo: check if datetime is always utc (or local possible?)
229 |      published_str = props.fetch( 'published', [] )[0]
230 |      pp published_str
231 |      if published_str
232 |        ## entry.published = DateTime.iso8601( published_str )
233 |        entry.published_local = DateTime.parse( published_str )
234 |        entry.published       = entry.published_local.utc
235 |      end
236 | 
237 |      ## check for authors
238 |      if props['author']
239 |        props['author'].each do |author_hash|
240 |          pp author_hash
241 |          entry.authors << build_author( author_hash )
242 |        end
243 |      end
244 | 
245 |      entry
246 |   end  # method build_entry
247 | 
248 |   def build_author( h )
249 |     puts "  build_author"
250 | 
251 |     author = HyAuthor.new
252 | 
253 |     author.name = h['value']
254 | 
255 |     ## todo/fix: -- note: for now skip possible embedded h-card
256 |     author
257 |   end  # method build_author
258 | 
259 | 
260 | end  # class HyBuilder
261 | 
262 | 
263 | 
264 | end # module FeedParser
265 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/feed.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module FeedParser
 4 | 
 5 | class Feed
 6 | 
 7 |   attr_accessor :format   # e.g. atom|rss 2.0|json etc.
 8 |   attr_accessor :title
 9 |   attr_accessor :url        ## todo - add alias site_url/home_page_url/page_url - why? why not??
10 |   attr_accessor :feed_url
11 | 
12 | 
13 |   attr_accessor :items
14 | 
15 |   attr_accessor :authors
16 |   def authors?()  @authors && @authors.size > 0;  end
17 |   ## note: author? is an alias for authors?
18 |   alias :author? :authors?
19 | 
20 |   ## add author  shortcut e.g. equals authors[0] - for now only read only
21 |   ##   fix: also add author=  why? why not???
22 |   def author() @authors[0]; end
23 | 
24 | 
25 |   attr_accessor :tags
26 |   def tags?()  @tags && @tags.size > 0;  end
27 | 
28 |   ## add alias category for tags (remove - why? why not?)
29 |   alias :categories :tags
30 | 
31 | 
32 |   def summary?()  @summary.nil? == false;  end
33 |   attr_accessor :summary        # e.g. description (rss)|subtitle (atom)
34 | 
35 |   ## add description as alias for summary (remove - why? why not?)
36 |   alias :description  :summary
37 |   alias :description= :summary=
38 |   alias :description? :summary?
39 | 
40 | 
41 |   ##
42 |   ##  todo/check/fix:
43 |   ##     use a extra field for atom subtitle
44 |   ##      - subtitle not the same as summary - why? why not?
45 |   ##      -  assume summary == description == abstract but
46 |   ##            keep subtitle separate e.g. assume subtitle is just a (simple) single line
47 |   ##
48 |   ##  for now alias summary to subtitle
49 |   alias :subtitle  :summary
50 |   alias :subtitle= :summary=
51 |   alias :subtitle? :summary?
52 | 
53 | 
54 | 
55 |   def updated?()  @updated.nil? == false;  end
56 |   attr_accessor :updated        # e.g. lastBuildDate (rss)|updated (atom)   -- always (converted) to utc
57 |   attr_accessor :updated_local  # "unparsed" local datetime as in feed (NOT converted to utc)
58 | 
59 |   attr_accessor :updated_text    #  string version of date
60 |   alias :updated_line :updated_text   # text|line - convention for "unparsed" 1:1 from feed; add str(too ??)
61 | 
62 |   def published?()  @published.nil? == false;  end
63 |   attr_accessor :published         # e.g. pubDate (rss)\n/a (atom)  -- note: published is basically an alias for created
64 |   attr_accessor :published_local   # "unparsed" local datetime as in feed (NOT converted to utc)
65 | 
66 |   attr_accessor :published_text    #  string version of date
67 |   alias :published_line :published_text   # text|line - convention for "unparsed" 1:1 from feed; add str(too ??)
68 | 
69 | 
70 |   attr_accessor :generator
71 | 
72 | 
73 |   ## fix:
74 |   #  add pretty printer/inspect (exclude object)
75 | 
76 | 
77 |   def initialize
78 |     ## note: make items, authors, tags empty arrays on startup (e.g. not nil)
79 |     @items   = []
80 |     @authors = []
81 |     @tags    = []
82 | 
83 |     @generator = Generator.new
84 |   end
85 | 
86 | end  # class Feed
87 | 
88 | end # module FeedParser
89 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/generator.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module FeedParser
 4 | 
 5 | class Generator
 6 | 
 7 |   attr_accessor :name
 8 |   ## note: title is an alias for name
 9 |   alias :title  :name
10 |   alias :title= :name=
11 | 
12 |   attr_accessor :version
13 | 
14 |   attr_accessor :url
15 |   ## note: uri is an alias for url
16 |   alias :uri  :url       ## add atom alias for uri - why? why not?
17 |   alias :uri= :url=
18 | 
19 | 
20 |   attr_accessor :text  # note: holds "unparsed" text (content) line form rss:generator
21 |   alias :line :text    # line|text (add str?? too)
22 | 
23 | 
24 |   def to_s
25 |     ## note: to_s  - allows to use just generator in templates
26 |     ##    will by default return name if present or as fallback "unparsed" text line
27 |      if @name    ## not blank
28 |        @name
29 |      else
30 |        @text
31 |      end
32 |   end
33 | 
34 | end  # class Generator
35 | 
36 | end # module FeedParser
37 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/item.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | module FeedParser
  4 | 
  5 | class Item
  6 | 
  7 |   attr_accessor :title
  8 |   attr_accessor :url
  9 |   attr_accessor :external_url
 10 | 
 11 |   # note: related_url is an alias for external_url
 12 |   alias :related_url  :external_url     ## link rel=related used in atom
 13 |   alias :related_url= :external_url=
 14 | 
 15 | 
 16 |   ## note: only content/content_html should use html;
 17 |   ##  all others (e.g. title/summary/content_text) shoud be plain (vanilla) text
 18 | 
 19 | 
 20 |   def content?()  @content.nil? == false;  end
 21 |   attr_accessor  :content
 22 | 
 23 |   ## note: content_html is an alias for content
 24 |   ##   will hold type html/xhtml/html-escaped  - check if always converted to string by parser ??
 25 |   alias :content_html  :content
 26 |   alias :content_html= :content=
 27 |   alias :content_html? :content?
 28 | 
 29 | 
 30 |   def content_text?()  @content_text.nil? == false;  end
 31 |   attr_accessor  :content_text
 32 | 
 33 | 
 34 | 
 35 |   def summary?()  @summary.nil? == false;  end
 36 |   attr_accessor   :summary
 37 | 
 38 |   ## add description as alias for summary (remove - why? why not?)
 39 |   alias :description  :summary
 40 |   alias :description= :summary=
 41 |   alias :description? :summary?
 42 | 
 43 | 
 44 | 
 45 |   def updated?()  @updated.nil? == false;  end
 46 |   attr_accessor :updated   # pubDate (RSS)|updated (Atom)
 47 |   attr_accessor :updated_local  # "unparsed" local datetime as in feed (NOT converted to utc)
 48 | 
 49 |   attr_accessor :updated_text    #  string version of date
 50 |   alias :updated_line :updated_text   # text|line - convention for "unparsed" 1:1 from feed; add str(too ??)
 51 | 
 52 | 
 53 |   def published?()  @published.nil? == false;  end
 54 |   attr_accessor :published  # note: published is basically an alias for created
 55 |   attr_accessor :published_local   # "unparsed" local datetime as in feed (NOT converted to utc)
 56 | 
 57 |   attr_accessor :published_text    #  string version of date
 58 |   alias :published_line :published_text   # text|line - convention for "unparsed" 1:1 from feed; add str(too ??)
 59 | 
 60 | 
 61 |   attr_accessor :id
 62 | 
 63 |   ## note: guid is an alias for id
 64 |   alias :guid  :id
 65 |   alias :guid= :id=
 66 | 
 67 |   attr_accessor :authors
 68 |   ## add author  shortcut e.g. equals authors[0] - for now only read only
 69 |   ##   fix: also add author=  why? why not???
 70 |   def authors?()  @authors && @authors.size > 0;  end
 71 |   ## note: author? is an alias for authors?
 72 |   alias :author? :authors?
 73 | 
 74 |   ## add author  shortcut e.g. equals authors[0] - for now only read only
 75 |   ##   fix: also add author=  why? why not???
 76 |   def author() @authors[0]; end
 77 | 
 78 | 
 79 |   attr_accessor :tags
 80 |   def tags?()  @tags && @tags.size > 0;  end
 81 | 
 82 |   alias :categories :tags    # for now allow categories alias for tags - remove (why? why not?)
 83 | 
 84 | 
 85 |   # add attachments/media enclosures (url, length and type)
 86 |   #  note: lets support more than one (it's an array)
 87 |   attr_accessor :attachments
 88 | 
 89 |   def attachment()    @attachments[0]; end
 90 |   def attachments?()  @attachments && @attachments.size > 0;  end
 91 |   alias :attachment? :attachments?
 92 | 
 93 |   alias :enclosures  :attachments
 94 |   alias :enclosure   :attachment
 95 |   alias :enclosures? :attachments?
 96 |   alias :enclosure?  :attachments?
 97 | 
 98 | 
 99 |   def initialize
100 |     ## note: make authors, tags empty arrays on startup (e.g. not nil)
101 |     @authors     = []
102 |     @tags        = []
103 |     @attachments = []
104 |   end
105 | 
106 | end  # class Item
107 | 
108 | end # module FeedParser
109 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/parser.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | module FeedParser
  4 | 
  5 | 
  6 | class Parser
  7 | 
  8 |   include LogUtils::Logging
  9 | 
 10 | 
 11 |   ### convenience class/factory method
 12 |   def self.parse( text, opts={} )
 13 |     self.new( text ).parse
 14 |   end
 15 | 
 16 |   ### Note: lets keep/use same API as RSS::Parser for now
 17 |   def initialize( text )
 18 |     @text = text
 19 |     @head = @text[0..100].strip     # note: remove leading spaces if present
 20 |   end
 21 | 
 22 | 
 23 | 
 24 |   #### note:
 25 |   # make format checks callable from outside (that is, use builtin helper methods)
 26 | 
 27 |   def is_xml?
 28 |     ## check if starts with knownn xml prologs
 29 |     @head.start_with?( '<?xml' )  ||
 30 |     @head.start_with?( '<feed' ) ||
 31 |     @head.start_with?( '<rss' )  ||
 32 |     @head.start_with?( '<rdf' )       # note - add support for rss 1.0 (aka rdf)
 33 |   end
 34 |   alias_method :xml?, :is_xml?
 35 | 
 36 |   JSONFEED_VERSION_RE = %r{"version":\s*"https://jsonfeed.org/version/1"}
 37 |   def is_json?
 38 |     ## check if starts with { for json object/hash
 39 |     ##    or if includes jsonfeed prolog
 40 |     @head.start_with?( '{' ) ||
 41 |     @head =~ JSONFEED_VERSION_RE
 42 |   end
 43 |   alias_method :json?, :is_json?
 44 | 
 45 |   def is_microformats?
 46 |     #  for now check for microformats v2 (e.g. h-entry, h-feed)
 47 |     #    check for v1 too - why? why not? (e.g. hentry, hatom ??)
 48 |     @text.include?( 'h-entry' ) ||
 49 |     @text.include?( 'h-feed' )
 50 |   end
 51 |   alias_method :microformats?, :is_microformats?
 52 | 
 53 | 
 54 | 
 55 |   def parse
 56 |     if is_xml?
 57 |        parse_xml
 58 |     elsif is_json?
 59 |        parse_json
 60 |     ##  note: reading/parsing microformat is for now optional
 61 |     ##    microformats gem requires nokogiri
 62 |     ##       nokogiri (uses libxml c-extensions) makes it hard to install (sometime)
 63 |     ##       thus, if you want to use it, please opt-in to keep the install "light"
 64 |     elsif defined?( Microformats ) && is_microformats?
 65 |        parse_microformats
 66 |     else  ## fallback - assume xml for now
 67 |        parse_xml
 68 |     end
 69 |   end # method parse
 70 | 
 71 | 
 72 |   def parse_microformats
 73 |     logger.debug "using microformats/#{Microformats::VERSION}"
 74 | 
 75 |     logger.debug "Parsing feed in html (w/ microformats)..."
 76 | 
 77 |     collection = Microformats.parse( @text )
 78 |     collection_hash = collection.to_hash
 79 | 
 80 |     feed = HyFeedBuilder.build( collection_hash )
 81 | 
 82 |     logger.debug "== #{feed.format} / #{feed.title} =="
 83 |     feed # return new (normalized) feed
 84 |   end # method parse_microformats
 85 | 
 86 | 
 87 |   def parse_json
 88 |     logger.debug "using stdlib json/#{JSON::VERSION}"
 89 | 
 90 |     logger.debug "Parsing feed in json..."
 91 |     feed_hash = JSON.parse( @text )
 92 | 
 93 |     feed = JsonFeedBuilder.build( feed_hash )
 94 | 
 95 |     logger.debug "== #{feed.format} / #{feed.title} =="
 96 |     feed # return new (normalized) feed
 97 |   end # method parse_json
 98 | 
 99 | 
100 |   def parse_xml
101 |     logger.debug "using stdlib rss/#{RSS::VERSION}"
102 | 
103 |     parser = RSS::Parser.new( @text )
104 | 
105 |     parser.do_validate            = false
106 |     parser.ignore_unknown_element = true
107 | 
108 |     logger.debug "Parsing feed in xml..."
109 |     feed_wild = parser.parse  # not yet normalized
110 | 
111 |     logger.debug "  feed.class=#{feed_wild.class.name}"
112 | 
113 |     if feed_wild.is_a?( RSS::Atom::Feed )
114 |       feed = AtomFeedBuilder.build( feed_wild, @text )
115 |     else  # -- assume RSS::Rss::Feed
116 |       feed = RssFeedBuilder.build( feed_wild, @text )
117 |     end
118 | 
119 |     logger.debug "== #{feed.format} / #{feed.title} =="
120 |     feed # return new (normalized) feed
121 |   end  # method  parse_xml
122 | 
123 | end  # class Parser
124 | 
125 | 
126 | end # module FeedParser
127 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/tag.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module FeedParser
 4 | 
 5 | class Tag
 6 | 
 7 |   attr_accessor :name
 8 |   ## note: title n term are aliases for name
 9 |   alias :title  :name
10 |   alias :title= :name=
11 | 
12 |   alias :term   :name
13 |   alias :term=  :name=
14 | 
15 | 
16 |   attr_accessor :scheme    ## use scheme_url -why? why not? is it always a url/uri??
17 |   ## note: domain (rss) is an alias for scheme (atom)
18 |   alias :domain   :scheme
19 |   alias :domain=  :scheme=
20 | 
21 | end  # class Tag
22 | 
23 | end # module FeedParser
24 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/thumbnail.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module FeedParser
 4 | 
 5 | class Thumbnail
 6 | 
 7 |   attr_accessor :url
 8 |   
 9 |   ## note: uri is an alias for url
10 |   alias :uri  :url       ## add atom alias for uri - why? why not?
11 |   alias :uri= :url=
12 | 
13 |   def width?()   @width.nil? == false;  end
14 |   attr_accessor :width
15 | 
16 |   def height?()  @height.nil? == false;  end
17 |   attr_accessor :height  # todo/check: use avatar_url ?? used by json feed -check if always a url
18 | 
19 | end  # class Thumbnail
20 | 
21 | end # module FeedParser
22 | 


--------------------------------------------------------------------------------
/feedparser/lib/feedparser/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module FeedParser
 4 | 
 5 |   MAJOR = 2
 6 |   MINOR = 2
 7 |   PATCH = 1
 8 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
 9 | 
10 |   def self.version
11 |     VERSION
12 |   end
13 | 
14 | 
15 |   def self.banner
16 |     "feedparser/#{VERSION} (with rss/#{RSS::VERSION} n json/#{JSON::VERSION}) on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
17 |   end
18 | 
19 |   def self.root
20 |     "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
21 |   end
22 | 
23 | end # module FeedParser
24 | 


--------------------------------------------------------------------------------
/feedparser/sandbox/testatom.rb:
--------------------------------------------------------------------------------
  1 | 
  2 | # stdlibs
  3 | require 'rss'
  4 | require 'pp'
  5 | 
  6 | # 3rd party libs/gems
  7 | require 'fetcher'
  8 | 
  9 | ## feed_url = 'http://weblog.rubyonrails.org/feed/atom.xml'   # atom 1.0
 10 | 
 11 | ## feed_url = 'http://www.quirksmode.org/blog/atom.xml'  # atom 0.3 (!)
 12 | 
 13 | feed_url = 'http://intertwingly.net/blog/index.atom'
 14 | 
 15 | 
 16 | xml = Fetcher.read( feed_url )
 17 | 
 18 | feed = RSS::Parser.parse( xml, false, true )
 19 | # 1) false => do NOT validate  (otherwise atom 0.3 fails)
 20 | # 2) true => ignore unknown elements   - use true - why? why not??
 21 | 
 22 | # Note: default is true,true - that is, do validate, and do ignore unknown elements
 23 | 
 24 | 
 25 | ############
 26 | #    format version mappings:
 27 | #  RSS::Atom::Feed   => atom
 28 | 
 29 | 
 30 | ###########
 31 | # Note: RSS::Atom::Feed
 32 | #   - has no feed_version  => assumes always 1.0 for now (no other atom format exists)
 33 | 
 34 | 
 35 | 
 36 | ##################
 37 | # RSS::Rss
 38 | # - see http://www.ruby-doc.org/stdlib-2.0.0/libdoc/rss/rdoc/RSS/Rss.html
 39 | 
 40 | puts "feed.class: #{feed.class.name}"
 41 | 
 42 | 
 43 | ## puts "dump feed:"
 44 | ## pp feed
 45 | 
 46 | # puts "dump feed.channel:"
 47 | # puts feed.channel.inspect
 48 | 
 49 | puts "dump feed.title (#{feed.title.class.name}):"
 50 | ## pp feed.title
 51 | 
 52 | puts "dump feed.id (#{feed.id.class.name}):"
 53 | ## pp feed.id
 54 | 
 55 | puts "dump feed.updated (#{feed.updated.class.name}):"
 56 | ## pp feed.updated
 57 | 
 58 | =begin
 59 | @link=
 60 |   [#<RSS::Atom::Feed::Link:0x8c90c7c
 61 |     @base=nil,
 62 |     @converter=nil,
 63 |     @do_validate=true,
 64 |     @href="http://weblog.rubyonrails.org/feed/",
 65 |     @hreflang=nil,
 66 |     @lang=nil,
 67 |     @length=nil,
 68 |     @parent=#<RSS::Atom::Feed:0x8c9c2d4 ...>,
 69 |     @rel="self",
 70 |     @title=nil,
 71 |     @type="application/atom+xml">,
 72 |    #<RSS::Atom::Feed::Link:0x8c8cec4
 73 |     @base=nil,
 74 |     @converter=nil,
 75 |     @do_validate=true,
 76 |     @href="http://weblog.rubyonrails.org/",
 77 |     @hreflang=nil,
 78 |     @lang=nil,
 79 |     @length=nil,
 80 |     @parent=#<RSS::Atom::Feed:0x8c9c2d4 ...>,
 81 |     @rel="alternate",
 82 |     @title=nil,
 83 |     @type="text">],
 84 | =end
 85 | 
 86 | # check links (assume it's any array - always)
 87 | puts "dump feed.link (#{feed.link.class.name}):"
 88 | puts "  link rel=#{feed.link.rel} type=#{feed.link.type} href=#{feed.link.href}"
 89 | 
 90 | ## Note: use links (with s - plural to get back array)
 91 | puts "dump feed.links (#{feed.links.class.name}):"
 92 | 
 93 | feed.links.each_with_index do |link,i|
 94 |   puts "[#{i}] link rel=#{link.rel} type=#{link.type} href=#{link.href}"
 95 | end
 96 | 
 97 | 
 98 | ## todo/check: atom feed can include published element (optionaly)?
 99 | 
100 | if feed.respond_to?( :published )
101 |   puts "dump feed.published (#{feed.published.class.name}):"
102 |   ## pp feed.published
103 | end
104 | 
105 | 
106 | pp feed
107 | 


--------------------------------------------------------------------------------
/feedparser/sandbox/testpp.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | require 'logutils'
 3 | require 'textutils'
 4 | require 'fetcher'
 5 | 
 6 | 
 7 | ## our own code
 8 | 
 9 | require 'feedparser'
10 | 
11 | 
12 | ## LogUtils::Logger.root.level = :debug
13 | 
14 | feed_url = "http://openfootball.github.io/feed.json"
15 | 
16 | text = Fetcher.read( feed_url )
17 | feed = FeedParser::Parser.parse( text )
18 | 
19 | pp feed
20 | 
21 | puts feed.title
22 | 


--------------------------------------------------------------------------------
/feedparser/sandbox/testrss.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | # stdlibs
 3 | require 'rss'
 4 | require 'pp'
 5 | 
 6 | # 3rd party libs/gems
 7 | require 'fetcher'
 8 | 
 9 | ## feed_url = 'http://feeds.feedburner.com/Rubyflow?format=xml'   # rss 2.0
10 | 
11 | feed_url = 'https://www.nostarch.com/feeds/comingsoon.xml'   # rss 2.0
12 | 
13 | 
14 | xml = Fetcher.read( feed_url )
15 | 
16 | feed = RSS::Parser.parse( xml, false, false )    # use most "liberal" version
17 | # 1) false => do NOT validate
18 | # 2) false => ignore unknown elements   - use true - why? why not??
19 | 
20 | 
21 | ############
22 | #   format version mappings:
23 | #  RSS::Rss    #rss_version==2.0   => rss 2.0
24 | #              #rss_version==
25 | 
26 | 
27 | ##################
28 | # RSS::Rss
29 | # - see http://www.ruby-doc.org/stdlib-2.0.0/libdoc/rss/rdoc/RSS/Rss.html
30 | 
31 | puts "feed.class: #{feed.class.name}"
32 | 
33 | puts "feed.rss_version: #{feed.rss_version}"
34 | puts "feed.feed_version: #{feed.feed_version}"
35 | 
36 | puts "feed.image:"
37 | pp feed.image
38 | 
39 | 
40 | pp feed
41 | 


--------------------------------------------------------------------------------
/feedparser/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | 
 4 | ## minitest setup
 5 | 
 6 | require 'minitest/autorun'
 7 | 
 8 | require 'logutils'
 9 | require 'textutils'
10 | require 'fetcher'
11 | 
12 | 
13 | ## our own code
14 | require 'feedparser'
15 | 
16 | 
17 | 
18 | LogUtils::Logger.root.level = :debug
19 | 
20 | 
21 | def fetch_and_parse_feed( url )
22 |   text = Fetcher.read( url )
23 | 
24 |   FeedParser::Parser.parse( text )
25 | end
26 | 


--------------------------------------------------------------------------------
/feedparser/test/media_rss_example.txt:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" version="2.0">
 3 |    <channel>
 4 |       <title>Calm Meditation</title>
 5 |       <link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com</link>
 6 |       <language>en-us</language>
 7 |       <pubDate>Mon, 02 Apr 2018 16:19:56 -0700</pubDate>
 8 |       <lastBuildDate>Mon, 02 Apr 2018 16:19:56 -0700</lastBuildDate>
 9 |       <managingEditor>tomjoht@gmail.com (Tom Johnson)</managingEditor>
10 |       <description>Contains short videos capturing still scenes from nature with a music background, intended for calming or meditation purposes. When you're stressed out or upset, watch a few videos. As your mind focuses on the small details, let your worries and frustrations float away. The purpose is not to entertain or to distract, but to help calm, soothe, and surface your inner quiet. The videos contain scenes from the San Tomas Aquinas trail in Santa Clara, California.</description>
11 |       <image>
12 |          <link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com</link>
13 |          <title>Calm Meditation</title>
14 |          <url>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/images/calmmeditationlogo_small.png</url>
15 |          <description>Contains short videos capturing still scenes from nature with a music background, intended for calming or meditation purposes. When you're stressed out or upset, watch a few videos. As your mind focuses on the small details, let your worries and frustrations float away. The purpose is not to entertain or to distract, but to help calm, soothe, and surface your inner quiet. The videos contain scenes from the San Tomas Aquinas trail in Santa Clara, California.</description>
16 |          <height>114</height>
17 |          <width>114</width>
18 |       </image>
19 |       <atom:link href="http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/feed.xml" rel="self" type="application/rss+xml" />
20 |       <item>
21 |          <title>Shade</title>
22 |          <pubDate>Mon, 23 Oct 2017 00:00:00 -0700</pubDate>
23 |          <link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/shade/</link>
24 |          <description>Quiet the mind, and the soul will speak. - Ma Jaya Sati Bhagavati</description>
25 |          <guid isPermaLink="false">http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/shade/</guid>
26 |          <media:category>All</media:category>
27 |          <media:category>Trail</media:category>
28 |          <media:content url="http://d1nixf144dcz0j.cloudfront.net/shade.mp4" language="en-us" fileSize="37000000" duration="120.0" medium="video" isDefault="true">
29 |             <media:title type="plain">Shade</media:title>
30 |             <media:description type="html">Quiet the mind, and the soul will speak. - Ma Jaya Sati Bhagavati</media:description>
31 |             <media:thumbnail url="http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/images/thumbs/shade.jpg" />
32 |             <media:credit role="author" scheme="urn:ebu">Tom Johnson</media:credit>
33 |             <media:copyright url="https://creativecommons.org/licenses/by/4.0/" />
34 |          </media:content>
35 |       </item>
36 |       <item>
37 |          <title>Spectators</title>
38 |          <pubDate>Thu, 12 Oct 2017 00:00:00 -0700</pubDate>
39 |          <link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/spectators/</link>
40 |          <description>"Your worst enemy cannot harm you as much as your own thoughts, unguarded." – Buddha</description>
41 |          <guid isPermaLink="false">http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/spectators/</guid>
42 |          <media:category>All</media:category>
43 |          <media:category>Grass</media:category>
44 |          <media:content url="http://d1nixf144dcz0j.cloudfront.net/spectators.mp4" language="en-us" fileSize="19000000" duration="120.0" medium="video" isDefault="true">
45 |             <media:title type="plain">Spectators</media:title>
46 |             <media:description type="html">"Your worst enemy cannot harm you as much as your own thoughts, unguarded." – Buddha</media:description>
47 |             <media:thumbnail url="http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/images/thumbs/spectators.jpg" />
48 |             <media:credit role="author" scheme="urn:ebu">Tom Johnson</media:credit>
49 |             <media:copyright url="https://creativecommons.org/licenses/by/4.0/" />
50 |          </media:content>
51 |       </item>
52 |    </channel>
53 | </rss>
54 | 


--------------------------------------------------------------------------------
/feedparser/test/test_atom_live.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_atom_live.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | 
 8 | 
 9 | require 'helper'
10 | 
11 | class TestAtomLive < Minitest::Test
12 | 
13 |   def test_rubyonrails
14 |     feed = fetch_and_parse_feed( 'http://weblog.rubyonrails.org/feed/atom.xml' )
15 | 
16 |     assert_equal 'atom',                            feed.format
17 |     assert_equal 'https://weblog.rubyonrails.org/', feed.url
18 |     ## note was (2020/1): 'http://weblog.rubyonrails.org/', feed.url
19 |   end
20 | 
21 | 
22 |   def test_railstutorial
23 |     feed = fetch_and_parse_feed( 'http://feeds.feedburner.com/railstutorial?format=xml' )
24 | 
25 |     assert_equal 'atom',                            feed.format
26 |     assert_equal 'https://news.learnenough.com/',   feed.url
27 |     ## note was (2020/1):  assert_equal 'http://news.learnenough.com/',   feed.url
28 |     ## note was (2017/5):  assert_equal 'http://news.railstutorial.org/', feed.url
29 |   end
30 | 
31 | 
32 | =begin
33 |   ### returns ssl error e.g.
34 |   ## OpenSSL::SSL::SSLError: SSL_connect SYSCALL returned=5 errno=0 state=SSLv2/v3 read server
35 |   def test_googlegroup
36 |     feed = fetch_and_parse_feed( 'https://groups.google.com/forum/feed/beerdb/topics/atom.xml?num=15' )
37 | 
38 |     assert_equal 'atom', feed.format
39 |     assert_equal 'https://groups.google.com/d/forum/beerdb', feed.url
40 |   end
41 | =end
42 | 
43 | 
44 |   def test_headius
45 |     feed = fetch_and_parse_feed( 'http://blog.headius.com/feed.xml' )
46 |     ## note was (2020/1): 'http://blog.headius.com/feeds/posts/default'
47 | 
48 |     assert_equal 'atom',    feed.format
49 |     assert_equal 'Jekyll',  feed.generator.name
50 |     ## note was (2020/1): 'Blogger'
51 |     
52 |     assert_equal 'Charles Oliver Nutter', feed.title
53 |     ## note was (2020/1): 'Headius', feed.title
54 |     assert_equal 'Java, Ruby, and JVM guy trying to make sense of it all', feed.summary  # aka subtitle in atom
55 |     ## note was (2020/1): 'Helping the JVM Into the 21st Century', feed.title
56 |     assert_equal 'https://headius.github.io/', feed.url
57 |     ## note was (2020/1): 'http://blog.headius.com/' 
58 |   end
59 | 
60 | end
61 | 


--------------------------------------------------------------------------------
/feedparser/test/test_attachments_live.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_attachments_live.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | ###
11 | ## note: needs to require oga gem (it's not required by default - it's a "soft" dependency)
12 | 
13 | require 'oga'
14 | 
15 | 
16 | 
17 | class TestAttachmentsLive < Minitest::Test
18 | 
19 |   def test_atom_enclose
20 |     feed = fetch_and_parse_feed( 'http://www.lse.ac.uk/assets/richmedia/webFeeds/publicLecturesAndEvents_AtomAllMediaTypesLatest100.xml' )
21 | 
22 |     assert_equal 'audio/mpeg', feed.items.first.attachment.type
23 |     assert_equal 'audio/mpeg', feed.items.first.enclosure.type
24 | 
25 |     assert_equal true, feed.items.first.attachment?
26 |     assert_equal true, feed.items.first.enclosure?
27 |   end
28 | 
29 |   def test_atom_media
30 |     feed = fetch_and_parse_feed( 'http://www.youtube.com/feeds/videos.xml?channel_id=UCZUT79WUUpZlZ-XMF7l4CFg' )
31 |     assert_equal true, feed.items.first.attachment?
32 |     assert feed.items.first.attachments.first.title
33 |     assert feed.items.first.attachments.first.url
34 |     assert feed.items.first.attachments.first.thumbnail
35 |     assert_instance_of FeedParser::Thumbnail, feed.items.first.attachments.first.thumbnail
36 |     assert feed.items.first.attachments.first.thumbnail.url
37 |     assert_equal 480, feed.items.first.attachments.first.thumbnail.width.to_i
38 |     assert_equal 360, feed.items.first.attachments.first.thumbnail.height.to_i
39 |     assert feed.items.first.attachments.first.description
40 |   end
41 | 
42 |   def test_rss_media
43 |     # tests an example RSS file from https://creator.amazon.com/documentation/ac/mrss.html. Not that unlike the Atom example, it does
44 |     # does not put everything under media:group
45 |     testpath = File.join(File.expand_path(File.dirname(__FILE__)), 'media_rss_example.txt')
46 |     feed_rss = File.read( testpath )
47 |     feed = FeedParser::Parser.parse( feed_rss )
48 |     assert_equal true, feed.items.first.attachment?
49 |     assert feed.items.first.attachments.first.title
50 |     assert feed.items.first.attachments.first.url
51 |     assert feed.items.first.attachments.first.thumbnail
52 |     assert_instance_of FeedParser::Thumbnail, feed.items.first.attachments.first.thumbnail
53 |     assert feed.items.first.attachments.first.thumbnail.url
54 |     assert_nil feed.items.first.attachments.first.thumbnail.width
55 |     assert_nil feed.items.first.attachments.first.thumbnail.height
56 |     assert feed.items.first.attachments.first.description
57 |   end
58 | 
59 |   def test_rss_enclosure
60 |     feed = fetch_and_parse_feed( 'http://www.radiofreesatan.com/category/featured/feed/' )
61 | 
62 |     assert_equal 'audio/mpeg', feed.items.first.attachment.type
63 |     assert_equal 'audio/mpeg', feed.items.first.enclosure.type
64 | 
65 |     assert_equal true, feed.items.first.attachment?
66 |     assert_equal true, feed.items.first.enclosure?
67 |   end
68 | 
69 | end
70 | 


--------------------------------------------------------------------------------
/feedparser/test/test_dates.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_dates.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | class TestDates < Minitest::Test
11 | 
12 |   def test_iso8601   # used by atom, json feed
13 | 
14 |     recs = [
15 |       [ '2017-05-20T19:23:06Z',       DateTime.new(2017, 5,20,19,23, 6) ],       # from daringfireball.json
16 |       [ '2017-05-20T19:23:08Z',       DateTime.new(2017, 5,20,19,23, 8) ],
17 |       [ '2017-05-17T08:02:12-07:00',  DateTime.new(2017, 5,17, 8, 2,12,'-7') ],  # from jsonfeed.json
18 |       [ '2017-05-18T21:08:49+00:00',  DateTime.new(2017, 5,18,21, 8,49) ],       # from byparker.json
19 |       [ '2017-05-18T21:08:49.123+00:00', DateTime.new(2017, 5,18,21, 8,49.123) ],   ### try with usec e.g. 49.124
20 |       [ '2017-05-17T08:02:12.567-07:00', DateTime.new(2017, 5,17, 8, 2,12.567,'-7') ],
21 |     ]
22 | 
23 |     recs.each do |rec|
24 |       d = DateTime.iso8601( rec[0] )
25 |       puts "class: #{d.class.name} - #{d.utc} (#{d.usec}) <= iso8601 #{rec[0]}"
26 |       pp d
27 |       assert_equal rec[1], d
28 |     end
29 |   end   # test_iso8601
30 | 
31 | 
32 |   def test_rfc822   # used by rss 2.0
33 | 
34 |     recs = [
35 |       [ 'Sat, 17 Jan 2015 11:57:47 +0000', DateTime.new( 2015, 1,17,11,57,47) ],  # from sitepoint.rss2
36 |       [ 'Thu, 15 Jan 2015 15:00:56 +0000', DateTime.new( 2015, 1,15,15,00,56) ],
37 |       [ 'Fri, 16 Jan 2015 17:33:47 +0100', DateTime.new( 2015, 1,16,17,33,47,'+1') ],  # from rubyflow.rss2
38 |       [ 'Fri, 16 Jan 2015 09:33:57 +0100', DateTime.new( 2015, 1,16, 9,33,57,'+1') ],
39 | 	    [ 'Wed, 17 Dec 2014 12:30:48 +0000', DateTime.new( 2014,12,17,12,30,48) ],  # from rubymine.rss2
40 |     ]
41 | 
42 |     recs.each do |rec|
43 |       d = DateTime.rfc822( rec[0] )
44 |       puts "class: #{d.class.name} - #{d.utc} (#{d.usec}) <= rfc822 #{rec[0]}"
45 |       pp d
46 |       assert_equal rec[1], d
47 |     end
48 | 
49 |   end  # test_rfc822
50 | 
51 | 
52 | end # class TestDates
53 | 


--------------------------------------------------------------------------------
/feedparser/test/test_microformats.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_microformats.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | ###
12 | ## note: needs to require microformats gem (it's not required by default)
13 | 
14 | require 'microformats'
15 | 
16 | 
17 | 
18 | class TestMicroformats < Minitest::Test
19 | 
20 |   def test_hentry
21 | 
22 | text =<<HTML
23 | <article class="h-entry">
24 |   <h1 class="p-name">Microformats are amazing</h1>
25 |   <p>Published by
26 |     <a class="p-author h-card" href="http://example.com">W. Developer</a>
27 |      on <time class="dt-published" datetime="2013-06-13 12:00:00">13<sup>th</sup> June 2013</time>
28 | 
29 |   <p class="p-summary">In which I extoll the virtues of using microformats.</p>
30 | 
31 |   <div class="e-content">
32 |     <p>Blah blah blah</p>
33 |   </div>
34 | </article>
35 | HTML
36 | 
37 |     feed = FeedParser::Parser.parse( text )
38 | 
39 |     assert_equal  'html', feed.format
40 |     assert_equal  1, feed.items.size
41 |     assert_equal  1, feed.items[0].authors.size
42 |     assert_equal  '<p>Blah blah blah</p>', feed.items[0].content_html
43 |     assert_equal  'Blah blah blah', feed.items[0].content_text
44 |     assert_equal  'Microformats are amazing', feed.items[0].title
45 |     assert_equal  'In which I extoll the virtues of using microformats.', feed.items[0].summary
46 |     assert_equal DateTime.new( 2013, 6, 13, 12, 0, 0 ).utc, feed.items[0].published
47 | 
48 |     assert_equal  'W. Developer', feed.items[0].authors[0].name
49 |   end
50 | 
51 | 
52 | end  # class TestMicroformats
53 | 


--------------------------------------------------------------------------------
/feedparser/test/test_rss_live.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_rss_live.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | class TestRssLive < Minitest::Test
10 | 
11 | 
12 |   def test_rubyflow
13 |     feed = fetch_and_parse_feed( 'http://feeds.feedburner.com/Rubyflow?format=xml' )
14 | 
15 |     assert_equal 'rss 2.0', feed.format
16 |   end
17 | 
18 |   def test_sitepointruby
19 |     feed = fetch_and_parse_feed( 'http://www.sitepoint.com/ruby/feed/' )
20 | 
21 |     assert_equal 'rss 2.0', feed.format
22 |   end
23 | 
24 |   def test_lambdatheultimate
25 |     ## check - has no item.guid - will use item.link for guid
26 |     feed = fetch_and_parse_feed( 'http://lambda-the-ultimate.org/rss.xml' )
27 | 
28 |     assert_equal 'rss 2.0', feed.format
29 |   end
30 | 
31 |   def test_rubymine
32 |     # includes item/content:encoded
33 |     feed = fetch_and_parse_feed( 'http://feeds.feedburner.com/jetbrains_rubymine?format=xml' )
34 | 
35 |     assert_equal 'rss 2.0', feed.format
36 |   end
37 | 
38 | end
39 | 


--------------------------------------------------------------------------------
/feeds/NOTES.md:
--------------------------------------------------------------------------------
 1 | # Notes
 2 | 
 3 | 
 4 | ## Todos
 5 | 
 6 | in news/wahingtionpost-lbogs.innovations.rss:
 7 | 
 8 | check:
 9 | 
10 | ```
11 | assert in ./news/washingtonpost-blogs-innovations.rss: feed.items[0].title  ==  "Google's AlphaGo beats the world's best Go player - again".
12 | --- expected
13 | +++ actual
14 | @@ -1,2 +1,2 @@
15 |  # encoding: UTF-8
16 | -"Google\u2019s AlphaGo beats the world\u2019s best Go player - again"
17 | +"Google\u2019s AlphaGo beats the world\u2019s best Go player \u2014 again"
18 | 
19 | ## feed.items[0].title:       Google’s AlphaGo beats the world’s best Go player — again
20 | 
21 | => fix reader - do NOT (auto-)convert dashes!!!!!!
22 | ```
23 | 
24 | 
25 | add support for multi-line with (preserved) newlines:
26 | 
27 | ```
28 | <description><![CDATA[
29 | <div>
30 | <a href="http://www.washingtonpost.com/blogs/innovations/wp/2017/05/26/googles-alphago-beats-the-worlds-best-go-player-again/" title="Google&#039;s AlphaGo beats the world&#039;s best Go player -- again"><img title="Google&#039;s AlphaGo beats the world&#039;s best Go player -- again" src="http://www.washingtonpost.com/rf/image_960w/2010-2019/WashingtonPost/2017/05/25/KidsPost/Images/AFP_OW1JP.jpg" alt="Google&#039;s AlphaGo beats the world&#039;s best Go player -- again" style="maxwidth: ; maxheight: ;" /></a>
31 | </div>
32 | <br/>
33 | AI: 2, Humanity: 0. A computer designed by Google researchers has beaten the world&#8217;s top Go player for the second game in a row, capturing the best-of-three match in Wuzhen, China, and confirming AI&#8217;s supremacy in what many consider as one of humanity&#8217;s most complex boardgames. Ke Jie, a 19-year old Go grandmaster, began the [&#8230;]]]></description>
34 | ```
35 | 
36 | 
37 | ---
38 | 
39 | in news/nytimes.rss
40 | 
41 | check
42 | 
43 | ```
44 | ##  todo: how to check for empty description - use empty string (or use nil) ???
45 | ## <description/>
46 | >>> pp feed.description
47 | ```
48 | 


--------------------------------------------------------------------------------
/feeds/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Tests, Tests, Tests
 3 | 
 4 | Feeds (in Atom, RSS, JSON Feed, HTML h-entry, etc.) with Test Assertions.
 5 | 
 6 | 
 7 | ## Usage
 8 | 
 9 | How does it work?
10 | 
11 | Use the triple-dash (e.g. `---`) on its own line to separate
12 | the feed source from all test assertions. Example:
13 | 
14 | 
15 | ```
16 | {
17 |   "version": "https://jsonfeed.org/version/1",
18 |   "title": "JSON Feed",
19 |   "description": "JSON Feed is a pragmatic syndication format for blogs, microblogs, and other time-based content.",
20 |   "home_page_url": "https://jsonfeed.org/",
21 |   "feed_url": "https://jsonfeed.org/feed.json",
22 |   "user_comment": "This feed allows you to read the posts...",
23 |   "favicon": "https://jsonfeed.org/graphics/icon.png",
24 |   "author": {
25 |     "name": "Brent Simmons and Manton Reece"
26 |   },
27 |   "items": [
28 |     {
29 |       "id": "https://jsonfeed.org/2017/05/17/announcing_json_feed",
30 |       "url": "https://jsonfeed.org/2017/05/17/announcing_json_feed",
31 |       "title": "Announcing JSON Feed",
32 |       "content_html": "<p>We — Manton Reece and Brent Simmons — have noticed that JSON has become the developers’ choice for APIs,...",
33 |       "date_published": "2017-05-17T08:02:12-07:00"
34 |     }
35 |   ]
36 | }
37 | 
38 | ---
39 | 
40 | feed.format:     json
41 | feed.title:      JSON Feed
42 | feed.url:        https://jsonfeed.org/
43 | feed.feed_url:   https://jsonfeed.org/feed.json
44 | feed.summary:    JSON Feed is a pragmatic syndication format for blogs, microblogs, and other time-based content.
45 | 
46 | feed.authors[0].name: Brent Simmons and Manton Reece
47 | 
48 | feed.items[0].title:     Announcing JSON Feed
49 | feed.items[0].url:       https://jsonfeed.org/2017/05/17/announcing_json_feed
50 | feed.items[0].id:        https://jsonfeed.org/2017/05/17/announcing_json_feed
51 | feed.items[0].published_local: >>> DateTime.new( 2017, 5, 17, 8, 2, 12, '-7' )
52 | feed.items[0].published:       >>> DateTime.new( 2017, 5, 17, 8, 2, 12, '-7' ).utc
53 | ```
54 | 
55 | 
56 | ## Run Tests
57 | 
58 | Use
59 | 
60 | ```
61 | ruby -I ./test test/test_feeds.rb
62 | ```
63 | 
64 | to run selected / individual test or to run
65 | all tests
66 | 
67 | ```
68 | rake          # or
69 | rake test
70 | ```
71 | 
72 | 
73 | Resulting in:
74 | 
75 | ```
76 | reading ./spec/rss/creator.rss ...
77 | [debug] using stdlib rss/0.2.7
78 | [debug] Parsing feed in xml...
79 | [debug]   feed.class=RSS::Rss
80 | [debug]   rss | feed.version  >2.0<
81 | [debug]   rss | feed.title  >Test Dublin Core< : String
82 | [debug]   rss | feed.description => summary  >< : String
83 | [debug]   rss | feed.lastBuildDate => updated  >Mon, 29 May 2017 20:51:30 +0200< : Time
84 | [debug]   rss | feed.pubDate => published  >< : NilClass
85 | eval assert_equal %{Peter Baker}, feed.items[0].authors[0].to_s
86 | eval assert_equal %{Peter Baker}, feed.items[0].author.text
87 | eval assert_equal %{Peter Baker}, feed.items[0].author.to_s
88 | eval assert_equal nil, feed.items[0].author.email
89 | ...
90 | 
91 | Finished in 5.104933s, 0.1959 runs/s, 79.1391 assertions/s.
92 | 
93 | 1 runs, 404 assertions, 0 failures, 0 errors, 0 skips
94 | ```
95 | 


--------------------------------------------------------------------------------
/feeds/Rakefile:
--------------------------------------------------------------------------------
 1 | ###
 2 | ## use TestTask from Rake for setting up testing
 3 | ##   see https://docs.ruby-lang.org/en/2.1.0/Rake/TestTask.html
 4 | 
 5 | require 'rake/testtask'
 6 | 
 7 | Rake::TestTask.new do |t|
 8 |   t.test_files = FileList['test/**/test_*.rb']
 9 |   t.libs = ['test']    ## (auto-)add to load path
10 | end
11 | 
12 | 
13 | task :default => :test
14 | 


--------------------------------------------------------------------------------
/feeds/books/pragprog.rss:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/"
  3 |                    xmlns:atom="http://www.w3.org/2005/Atom"
  4 |                    xmlns:access="http://www.bloglines.com/about/specs/fac-1.0">
  5 |   <access:restriction relationship="deny"/>
  6 |   <channel>
  7 |     <atom:link href="https://pragprog.com/feed/global" rel="self" type="application/rss+xml"/>
  8 |     <title>Pragmatic Bookshelf</title>
  9 |     <link>https://pragprog.com/</link>
 10 |     <description>Up-to-date information about the Pragmatic Bookshelf</description>
 11 |     <language>en-us</language>
 12 |     <pubDate>Sat, 27 May 2017 17:18:55 +0000</pubDate>
 13 |     <item>
 14 |       <title>Python Testing with pytest</title>
 15 |       <description>&lt;div id="cms-content"&gt;
 16 | 
 17 |       &lt;/div&gt;
 18 |      </description>
 19 |       <pubDate>Tue, 23 May 2017 18:35:00 +0000</pubDate>
 20 |       <guid>http://pragprog.com/news/python-testing-with-pytest?3639225</guid>
 21 |       <link>http://pragprog.com/news/python-testing-with-pytest</link>
 22 |       <category>news</category>
 23 |     </item>
 24 |     <item>
 25 |       <title>Agile Web Development with Rails 5.1, Upgrade Details</title>
 26 |       <description>&lt;div id="cms-content"&gt;
 27 | 
 28 |       &lt;/div&gt;
 29 |      </description>
 30 |       <pubDate>Wed, 10 May 2017 14:32:55 +0000</pubDate>
 31 |       <guid>http://pragprog.com/news/agile-web-development-with-rails-5-1-upgrade-details?3629250</guid>
 32 |       <link>http://pragprog.com/news/agile-web-development-with-rails-5-1-upgrade-details</link>
 33 |       <category>news</category>
 34 |     </item>
 35 |     <item>
 36 |       <title>Surprise Sale: 40% Off this week only</title>
 37 |       <description>&lt;div id="cms-content"&gt;
 38 | 
 39 |       &lt;/div&gt;
 40 |      </description>
 41 |       <pubDate>Wed, 26 Apr 2017 13:20:02 +0000</pubDate>
 42 |       <guid>http://pragprog.com/news/surprise-sale-40-off-this-week-only?3619134</guid>
 43 |       <link>http://pragprog.com/news/surprise-sale-40-off-this-week-only</link>
 44 |       <category>news</category>
 45 |     </item>
 46 |     <item>
 47 |       <title>Now in print: iOS 10 SDK Development: Creating iPhone and iPad Apps with Swift</title>
 48 |       <description>&lt;div id="cms-content"&gt;
 49 | 
 50 |       &lt;/div&gt;
 51 |      </description>
 52 |       <pubDate>Tue, 28 Mar 2017 13:54:09 +0000</pubDate>
 53 |       <guid>http://pragprog.com/news/now-in-print-ios-10-sdk-development-creating-iphone-and-ipad-apps-with-swift?3607075</guid>
 54 |       <link>http://pragprog.com/news/now-in-print-ios-10-sdk-development-creating-iphone-and-ipad-apps-with-swift</link>
 55 |       <category>news</category>
 56 |     </item>
 57 |     <item>
 58 |       <title>Functional Web Development with Elixir, OTP, and Phoenix</title>
 59 |       <description>&lt;div id="cms-content"&gt;
 60 | 
 61 |       &lt;/div&gt;
 62 |      </description>
 63 |       <pubDate>Tue, 21 Mar 2017 14:48:25 +0000</pubDate>
 64 |       <guid>http://pragprog.com/news/functional-web-development-with-elixir-otp-and-phoenix?3602607</guid>
 65 |       <link>http://pragprog.com/news/functional-web-development-with-elixir-otp-and-phoenix</link>
 66 |       <category>news</category>
 67 |     </item>
 68 |     <item>
 69 |       <title>A Common-Sense Guide to Data Structures and Algorithms</title>
 70 |       <description>&lt;div id="cms-content"&gt;
 71 | 
 72 |       &lt;/div&gt;
 73 |      </description>
 74 |       <pubDate>Tue, 14 Mar 2017 17:32:21 +0000</pubDate>
 75 |       <guid>http://pragprog.com/news/a-common-sense-guide-to-data-structures-and-algorithms?3600282</guid>
 76 |       <link>http://pragprog.com/news/a-common-sense-guide-to-data-structures-and-algorithms</link>
 77 |       <category>news</category>
 78 |     </item>
 79 |     <item>
 80 |       <title>Design It! From Programmer to Software Architect</title>
 81 |       <description>&lt;div id="cms-content"&gt;
 82 | 
 83 |       &lt;/div&gt;
 84 |      </description>
 85 |       <pubDate>Wed, 08 Mar 2017 14:30:18 +0000</pubDate>
 86 |       <guid>http://pragprog.com/news/design-it-from-programmer-to-software-architect?3597202</guid>
 87 |       <link>http://pragprog.com/news/design-it-from-programmer-to-software-architect</link>
 88 |       <category>news</category>
 89 |     </item>
 90 |     <item>
 91 |       <title>Programming Elixir 1.3 in print</title>
 92 |       <description>&lt;div id="cms-content"&gt;
 93 | 
 94 |       &lt;/div&gt;
 95 |      </description>
 96 |       <pubDate>Sun, 19 Feb 2017 14:35:41 +0000</pubDate>
 97 |       <guid>http://pragprog.com/news/programming-elixir-1-3-in-print?3593675</guid>
 98 |       <link>http://pragprog.com/news/programming-elixir-1-3-in-print</link>
 99 |       <category>news</category>
100 |     </item>
101 |     <item>
102 |       <title>The Cucumber Book, Second Edition for Rails 5</title>
103 |       <description>&lt;div id="cms-content"&gt;
104 | 
105 |       &lt;/div&gt;
106 |      </description>
107 |       <pubDate>Sun, 19 Feb 2017 14:31:11 +0000</pubDate>
108 |       <guid>http://pragprog.com/news/the-cucumber-book-second-edition-for-rails-5?3593671</guid>
109 |       <link>http://pragprog.com/news/the-cucumber-book-second-edition-for-rails-5</link>
110 |       <category>news</category>
111 |     </item>
112 |     <item>
113 |       <title>Practical Vim for Vim 8 and Core Data for Swift 3</title>
114 |       <description>&lt;div id="cms-content"&gt;
115 | 
116 |       &lt;/div&gt;
117 |      </description>
118 |       <pubDate>Sun, 19 Feb 2017 14:29:19 +0000</pubDate>
119 |       <guid>http://pragprog.com/news/practical-vim-for-vim-8-and-core-data-for-swift-3?3593667</guid>
120 |       <link>http://pragprog.com/news/practical-vim-for-vim-8-and-core-data-for-swift-3</link>
121 |       <category>news</category>
122 |     </item>
123 |   </channel>
124 | </rss>
125 | 
126 | ---
127 | 
128 | feed.format:      rss 2.0
129 | feed.title:       Pragmatic Bookshelf
130 | feed.url:         https://pragprog.com/
131 | feed.description: Up-to-date information about the Pragmatic Bookshelf
132 | feed.published:   >>> DateTime.new( 2017, 5, 27, 17, 18, 55 )
133 | 
134 | 
135 | feed.items.size:    >>> 10
136 | 
137 | feed.items[0].title:     Python Testing with pytest
138 | feed.items[0].url:       http://pragprog.com/news/python-testing-with-pytest
139 | feed.items[0].guid:      http://pragprog.com/news/python-testing-with-pytest?3639225
140 | feed.items[0].published: >>> DateTime.new( 2017, 5, 23, 18, 35 )
141 | 
142 | feed.items[1].title:     Agile Web Development with Rails 5.1, Upgrade Details
143 | feed.items[1].url:       http://pragprog.com/news/agile-web-development-with-rails-5-1-upgrade-details
144 | feed.items[1].guid:      http://pragprog.com/news/agile-web-development-with-rails-5-1-upgrade-details?3629250
145 | feed.items[1].published: >>> DateTime.new( 2017, 5, 10, 14, 32, 55 )
146 | 
147 | 
148 | >>> pp feed.items[0].description
149 | 


--------------------------------------------------------------------------------
/feeds/comics/xkcd.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
 3 |   <title>xkcd.com</title>
 4 |   <link href="https://xkcd.com/" rel="alternate"></link>
 5 |   <id>https://xkcd.com/</id>
 6 |   <updated>2017-05-22T00:00:00Z</updated>
 7 |   <entry>
 8 |     <title>Genetic Testing Results</title>
 9 |     <link href="https://xkcd.com/1840/" rel="alternate"></link>
10 |     <updated>2017-05-22T00:00:00Z</updated>
11 |     <id>https://xkcd.com/1840/</id>
12 |     <summary type="html">&lt;img src="https://imgs.xkcd.com/comics/genetic_testing_results.png" title="That's very exciting! The bad news is that it's a risk factor for a lot of things." alt="That's very exciting! The bad news is that it's a risk factor for a lot of things." /&gt;</summary>
13 |   </entry>
14 |   <entry>
15 |     <title>Doctor Visit</title>
16 |     <link href="https://xkcd.com/1839/" rel="alternate"></link>
17 |     <updated>2017-05-19T00:00:00Z</updated>
18 |     <id>https://xkcd.com/1839/</id>
19 |     <summary type="html">&lt;img src="https://imgs.xkcd.com/comics/doctor_visit.png" title="According to these blood tests, you're like 30% cereal." alt="According to these blood tests, you're like 30% cereal." /&gt;</summary>
20 |   </entry>
21 |   <entry>
22 |     <title>Machine Learning</title>
23 |     <link href="https://xkcd.com/1838/" rel="alternate"></link>
24 |     <updated>2017-05-17T00:00:00Z</updated>
25 |     <id>https://xkcd.com/1838/</id>
26 |     <summary type="html">&lt;img src="https://imgs.xkcd.com/comics/machine_learning.png" title="The pile gets soaked with data and starts to get mushy over time, so it's technically recurrent." alt="The pile gets soaked with data and starts to get mushy over time, so it's technically recurrent." /&gt;</summary>
27 |   </entry>
28 |   <entry>
29 |     <title>Rental Car</title>
30 |     <link href="https://xkcd.com/1837/" rel="alternate"></link>
31 |     <updated>2017-05-15T00:00:00Z</updated>
32 |     <id>https://xkcd.com/1837/</id>
33 |     <summary type="html">&lt;img src="https://imgs.xkcd.com/comics/rental_car.png" title="Technically, both cars are haunted, but the murder ghosts can't stand listening to the broken GPS for more than a few minutes." alt="Technically, both cars are haunted, but the murder ghosts can't stand listening to the broken GPS for more than a few minutes." /&gt;</summary>
34 |   </entry>
35 | </feed>
36 | 
37 | ---
38 | 
39 | feed.format:        atom
40 | feed.title:         xkcd.com
41 | feed.url:           https://xkcd.com/
42 | feed.updated:       >>> DateTime.new( 2017, 5, 22 )
43 | feed.items.size:    >>> 4
44 | 
45 | feed.items[0].title:   Genetic Testing Results
46 | feed.items[0].url:     https://xkcd.com/1840/
47 | feed.items[0].guid:    https://xkcd.com/1840/
48 | feed.items[0].updated: >>> DateTime.new( 2017, 5, 22 )
49 | 


--------------------------------------------------------------------------------
/feeds/comics/xkcd.rss:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <rss version="2.0">
 3 |   <channel>
 4 |     <title>xkcd.com</title>
 5 |     <link>https://xkcd.com/</link>
 6 |     <description>xkcd.com: A webcomic of romance and math humor.</description>
 7 |     <language>en</language>
 8 |     <item>
 9 |       <title>Genetic Testing Results</title>
10 |       <link>https://xkcd.com/1840/</link>
11 |       <description>&lt;img src="https://imgs.xkcd.com/comics/genetic_testing_results.png" title="That's very exciting! The bad news is that it's a risk factor for a lot of things." alt="That's very exciting! The bad news is that it's a risk factor for a lot of things." /&gt;</description>
12 |       <pubDate>Mon, 22 May 2017 04:00:00 -0000</pubDate>
13 |       <guid>https://xkcd.com/1840/</guid>
14 |     </item>
15 |     <item>
16 |       <title>Doctor Visit</title>
17 |       <link>https://xkcd.com/1839/</link>
18 |       <description>&lt;img src="https://imgs.xkcd.com/comics/doctor_visit.png" title="According to these blood tests, you're like 30% cereal." alt="According to these blood tests, you're like 30% cereal." /&gt;</description>
19 |       <pubDate>Fri, 19 May 2017 04:00:00 -0000</pubDate>
20 |       <guid>https://xkcd.com/1839/</guid>
21 |     </item>
22 |     <item>
23 |       <title>Machine Learning</title>
24 |       <link>https://xkcd.com/1838/</link>
25 |       <description>&lt;img src="https://imgs.xkcd.com/comics/machine_learning.png" title="The pile gets soaked with data and starts to get mushy over time, so it's technically recurrent." alt="The pile gets soaked with data and starts to get mushy over time, so it's technically recurrent." /&gt;</description>
26 |       <pubDate>Wed, 17 May 2017 04:00:00 -0000</pubDate>
27 |       <guid>https://xkcd.com/1838/</guid>
28 |     </item>
29 |     <item>
30 |       <title>Rental Car</title>
31 |       <link>https://xkcd.com/1837/</link>
32 |       <description>&lt;img src="https://imgs.xkcd.com/comics/rental_car.png" title="Technically, both cars are haunted, but the murder ghosts can't stand listening to the broken GPS for more than a few minutes." alt="Technically, both cars are haunted, but the murder ghosts can't stand listening to the broken GPS for more than a few minutes." /&gt;</description>
33 |       <pubDate>Mon, 15 May 2017 04:00:00 -0000</pubDate>
34 |       <guid>https://xkcd.com/1837/</guid>
35 |     </item>
36 |   </channel>
37 | </rss>
38 | 
39 | ---
40 | 
41 | feed.format:        rss 2.0
42 | feed.title:         xkcd.com
43 | feed.description:   xkcd.com: A webcomic of romance and math humor.
44 | feed.url:           https://xkcd.com/
45 | feed.items.size:    >>> 4
46 | 
47 | feed.items[0].title:       Genetic Testing Results
48 | feed.items[0].url:         https://xkcd.com/1840/
49 | feed.items[0].guid:        https://xkcd.com/1840/
50 | feed.items[0].published:   >>> DateTime.new( 2017, 5, 22, 4, 0 )
51 | feed.items[0].description: <img src="https://imgs.xkcd.com/comics/genetic_testing_results.png" title="That's very exciting! The bad news is that it's a risk factor for a lot of things." alt="That's very exciting! The bad news is that it's a risk factor for a lot of things." />
52 | 
53 | 
54 | feed.items[1].title:     Doctor Visit
55 | feed.items[1].url:       https://xkcd.com/1839/
56 | feed.items[1].guid:      https://xkcd.com/1839/
57 | feed.items[1].published: >>> DateTime.new( 2017, 5, 19, 4, 0 )
58 | 
59 | >>> pp feed.items[0].description
60 | 


--------------------------------------------------------------------------------
/feeds/misc/googlegroups.atom:
--------------------------------------------------------------------------------
 1 | <feed xmlns="http://www.w3.org/2005/Atom">
 2 |   <id>https://groups.google.com/d/forum/beerdb</id>
 3 |   <title type="text">Open Beer &amp; Brewery Database (beer.db)</title>
 4 |   <subtitle>Free open public domain beer database &amp;amp; schema (beer.db) for use in any (programming) language (e.g. uses plain text fixtures/data sets). Questions? Comments?</subtitle>
 5 |   <link rel="self" href="https://groups.google.com/forum/feed/beerdb/topics/atom_v1_0.xml" title="beerdb feed"></link>
 6 |   <updated></updated>
 7 |   <generator>Google Groups</generator>
 8 |   <entry>
 9 |     <author>
10 |       <name>Joe Sixpack</name>
11 |     </author>
12 |     <updated>2014-12-17T11:54:43Z</updated>
13 |     <id>https://groups.google.com/d/topic/beerdb/KpQOUDYJ3J8</id>
14 |     <link href="https://groups.google.com/d/topic/beerdb/KpQOUDYJ3J8"></link>
15 |     <title type="text">Planet Beer (Austria, Belgium) - Feeds Incl. Craft Fest Wien, Beer-A-Day, proBier n Friends</title>
16 |     <summary type="html">Hello, I&apos;ve started putting together a planet site for beer, that is, Planet Beer [1]. The first feed lists include: - Austria [2] - Belgium [3] You&apos;re welcome and invited to suggest new countries and feeds. Cheers. Prost. [1] http://planetbeer.herokuapp.com [2] http://github.com/openbeer/planet/blob/master/</summary>
17 |   </entry>
18 | </feed>
19 | 
20 | ---
21 | 
22 | feed.format:    atom
23 | feed.title:     Open Beer & Brewery Database (beer.db)
24 | feed.url:       https://groups.google.com/d/forum/beerdb
25 | 
26 | feed.generator.name: Google Groups
27 | 
28 | feed.items[0].title: Planet Beer (Austria, Belgium) - Feeds Incl. Craft Fest Wien, Beer-A-Day, proBier n Friends
29 | feed.items[0].url:   https://groups.google.com/d/topic/beerdb/KpQOUDYJ3J8
30 | 
31 | 
32 | ### todo: fix: &amp;amp;  => &amp;  -> always assume plain text? (by default) - auto-escape xml entities??
33 | feed.summary: Free open public domain beer database &amp; schema (beer.db) for use in any (programming) language (e.g. uses plain text fixtures/data sets). Questions? Comments?
34 | 
35 | ### todo: add check for datetime (use to_s ??)
36 | ## feed.updated.to_s:  2014-12-31T15:33:00+00:00
37 | ## feed.items[0].to_s: 2014-12-31T15:33:00+00:00
38 | 


--------------------------------------------------------------------------------
/feeds/misc/googlegroups2.atom:
--------------------------------------------------------------------------------
 1 | <feed xmlns="http://www.w3.org/2005/Atom">
 2 |   <id>https://groups.google.com/d/forum/beerdb</id>
 3 |   <title type="text">Open Beer &amp; Brewery Database (beer.db)</title>
 4 |   <subtitle>Free open public domain beer database &amp;amp; schema (beer.db) for use in any (programming) language (e.g. uses plain text fixtures/data sets). Questions? Comments?</subtitle>
 5 |   <link rel="self" href="https://groups.google.com/forum/feed/beerdb/topics/atom_v1_0.xml" title="beerdb feed"></link>
 6 |   <updated></updated>
 7 |   <generator>
 8 |      Google Groups (w/ leading n trailing newlines stripped)
 9 |   </generator>
10 |   <entry>
11 |     <author>
12 |       <name>Joe Sixpack</name>
13 |     </author>
14 |     <updated>2014-12-17T11:54:43Z</updated>
15 |     <id>https://groups.google.com/d/topic/beerdb/KpQOUDYJ3J8</id>
16 |     <link href="https://groups.google.com/d/topic/beerdb/KpQOUDYJ3J8"></link>
17 |     <title type="text">Planet Beer (Austria, Belgium) - Feeds Incl. Craft Fest Wien, Beer-A-Day, proBier n Friends</title>
18 |     <summary type="html">Hello, I&apos;ve started putting together a planet site for beer, that is, Planet Beer [1]. The first feed lists include: - Austria [2] - Belgium [3] You&apos;re welcome and invited to suggest new countries and feeds. Cheers. Prost. [1] http://planetbeer.herokuapp.com [2] http://github.com/openbeer/planet/blob/master/</summary>
19 |   </entry>
20 | </feed>
21 | 
22 | ---
23 | 
24 | feed.format:     atom
25 | feed.url:        https://groups.google.com/d/forum/beerdb
26 | 
27 | feed.generator.name:  Google Groups (w/ leading n trailing newlines stripped)
28 | 


--------------------------------------------------------------------------------
/feeds/misc/jsonfeed.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "https://jsonfeed.org/version/1",
 3 |   "title": "JSON Feed",
 4 |   "description": "JSON Feed is a pragmatic syndication format for blogs, microblogs, and other time-based content.",
 5 |   "home_page_url": "https://jsonfeed.org/",
 6 |   "feed_url": "https://jsonfeed.org/feed.json",
 7 |   "user_comment": "This feed allows you to read the posts from this site in any feed reader that supports the JSON Feed format. To add this feed to your reader, copy the following URL — https://jsonfeed.org/feed.json — and add it your reader.",
 8 |   "favicon": "https://jsonfeed.org/graphics/icon.png",
 9 |   "author": {
10 |     "name": "Brent Simmons and Manton Reece"
11 |   },
12 |   "items": [
13 |     {
14 |       "id": "https://jsonfeed.org/2017/05/17/announcing_json_feed",
15 |       "url": "https://jsonfeed.org/2017/05/17/announcing_json_feed",
16 |       "title": "Announcing JSON Feed",
17 |       "content_html": "<p>We — Manton Reece and Brent Simmons — have noticed that JSON has become the developers’ choice for APIs, and that developers will often go out of their way to avoid XML. JSON is simpler to read and write, and it’s less prone to bugs.</p>\n\n<p>So we developed JSON Feed, a format similar to <a href=\"http://cyber.harvard.edu/rss/rss.html\">RSS</a> and <a href=\"https://tools.ietf.org/html/rfc4287\">Atom</a> but in JSON. It reflects the lessons learned from our years of work reading and publishing feeds.</p>\n\n<p><a href=\"https://jsonfeed.org/version/1\">See the spec</a>. It’s at version 1, which may be the only version ever needed. If future versions are needed, version 1 feeds will still be valid feeds.</p>\n\n<h4>Notes</h4>\n\n<p>We have a <a href=\"https://github.com/manton/jsonfeed-wp\">WordPress plugin</a> and, coming soon, a JSON Feed Parser for Swift. As more code is written, by us and others, we’ll update the <a href=\"https://jsonfeed.org/code\">code</a> page.</p>\n\n<p>See <a href=\"https://jsonfeed.org/mappingrssandatom\">Mapping RSS and Atom to JSON Feed</a> for more on the similarities between the formats.</p>\n\n<p>This website — the Markdown files and supporting resources — <a href=\"https://github.com/brentsimmons/JSONFeed\">is up on GitHub</a>, and you’re welcome to comment there.</p>\n\n<p>This website is also a blog, and you can subscribe to the <a href=\"https://jsonfeed.org/xml/rss.xml\">RSS feed</a> or the <a href=\"https://jsonfeed.org/feed.json\">JSON feed</a> (if your reader supports it).</p>\n\n<p>We worked with a number of people on this over the course of several months. We list them, and thank them, at the bottom of the <a href=\"https://jsonfeed.org/version/1\">spec</a>. But — most importantly — <a href=\"http://furbo.org/\">Craig Hockenberry</a> spent a little time making it look pretty. :)</p>",
18 |       "date_published": "2017-05-17T08:02:12-07:00"
19 |     }
20 |   ]
21 | }
22 | 
23 | ---
24 | 
25 | feed.format:     json
26 | feed.title:      JSON Feed
27 | feed.url:        https://jsonfeed.org/
28 | feed.feed_url:   https://jsonfeed.org/feed.json
29 | feed.summary:    JSON Feed is a pragmatic syndication format for blogs, microblogs, and other time-based content.
30 | 
31 | feed.authors[0].name: Brent Simmons and Manton Reece
32 | 
33 | 
34 | feed.items[0].title:     Announcing JSON Feed
35 | feed.items[0].url:       https://jsonfeed.org/2017/05/17/announcing_json_feed
36 | feed.items[0].id:        https://jsonfeed.org/2017/05/17/announcing_json_feed
37 | feed.items[0].published: >>> DateTime.new( 2017, 5, 17, 8, 2, 12, '-7' ).utc
38 | 


--------------------------------------------------------------------------------
/feeds/news/nytimes-paul-krugman.rss:
--------------------------------------------------------------------------------
 1 | <rss version="2.0">
 2 |   <channel>
 3 |     <ttl>60</ttl>
 4 |     <title>Paul Krugman</title>
 5 |     <link>https://www.nytimes.com/column/paul-krugman</link>
 6 |     <description>Paul Krugman, a New York Times Op-Ed columnist, writes about macroeconomics, trade, health care, social policy and politics. In 2008, he received the Nobel Prize in Economics.</description>
 7 |     <copyright>Copyright 2017 The New York Times Company</copyright>
 8 |     <language>en-us</language>
 9 |     <item>
10 |       <title>Trump&#8217;s Energy, Low and Dirty</title>
11 |       <link>https://www.nytimes.com/2017/05/29/opinion/trump-g-7-summit-energy.html</link>
12 |       <description>Risking the planet to keep a lie alive.</description>
13 |       <author>By PAUL KRUGMAN</author>
14 |       <pubDate>Mon, 29 May 2017 03:21:09 -0500</pubDate>
15 |     </item>
16 |     <item>
17 |       <title>It&#8217;s All About Trump&#8217;s Contempt</title>
18 |       <link>https://www.nytimes.com/2017/05/26/opinion/trumpcare-cbo-federal-budget.html</link>
19 |       <description>His budget and health plan show he despises his voters. Will they notice?</description>
20 |       <author>By PAUL KRUGMAN</author>
21 |       <pubDate>Fri, 26 May 2017 03:21:23 -0500</pubDate>
22 |     </item>
23 |     <item>
24 |       <title>The Unfreeing of American Workers</title>
25 |       <link>https://www.nytimes.com/2017/05/22/opinion/american-workers-noncompete-agreements.html</link>
26 |       <description>Creeping along the real road to serfdom.</description>
27 |       <author>By PAUL KRUGMAN</author>
28 |       <pubDate>Mon, 22 May 2017 03:21:10 -0500</pubDate>
29 |     </item>
30 |     <item>
31 |       <title>What&#8217;s the Matter With Republicans?</title>
32 |       <link>https://www.nytimes.com/2017/05/19/opinion/whats-the-matter-with-republicans.html</link>
33 |       <description>We need to understand what made Trump possible.</description>
34 |       <author>By PAUL KRUGMAN</author>
35 |       <pubDate>Fri, 19 May 2017 03:21:08 -0500</pubDate>
36 |     </item>
37 |     <item>
38 |       <title>The Priming of Mr. Donald Trump</title>
39 |       <link>https://www.nytimes.com/2017/05/15/opinion/trump-tax-cuts-deficit.html</link>
40 |       <description>He&#8217;s not the only one with fiscal fantasies.</description>
41 |       <author>By PAUL KRUGMAN</author>
42 |       <pubDate>Mon, 15 May 2017 03:21:15 -0500</pubDate>
43 |     </item>
44 |     <item>
45 |       <title>Judas, Tax Cuts and the Great Betrayal</title>
46 |       <link>https://www.nytimes.com/2017/05/12/opinion/judas-tax-cuts-and-the-great-betrayal.html</link>
47 |       <description>The Republican response to Trump&#8217;s cover-up will live in infamy forever.</description>
48 |       <author>By PAUL KRUGMAN</author>
49 |       <pubDate>Fri, 12 May 2017 03:22:04 -0500</pubDate>
50 |     </item>
51 | 
52 |     <item><title>Republicans Party Like It&#8217;s 1984</title><link>https://www.nytimes.com/2017/05/08/opinion/republicans-party-like-its-1984.html</link><description>Making policy by lying about everything.</description><author>By PAUL KRUGMAN</author><pubDate>Mon, 08 May 2017 03:21:24 -0500</pubDate></item>
53 | 
54 |     <item><title>What&#8217;s the Matter With Europe?</title><link>https://www.nytimes.com/2017/05/05/opinion/european-union-france-election.html</link><description>Le Pen must be beaten, but then what?</description><author>By PAUL KRUGMAN</author><pubDate>Fri, 05 May 2017 03:21:24 -0500</pubDate></item>
55 | 
56 |     <item><title>On the Power of Being Awful</title><link>https://www.nytimes.com/2017/05/01/opinion/donald-trump-on-the-power-of-being-awful.html</link><description>Trump supporters will never admit they were wrong.</description><author>By PAUL KRUGMAN</author><pubDate>Mon, 01 May 2017 03:21:16 -0500</pubDate></item>
57 | 
58 |     <item><title>Living in the Trump Zone</title><link>https://www.nytimes.com/2017/04/28/opinion/living-in-the-trump-zone.html</link><description>We&#8217;re in a place and time where childish petulance drives policy.</description><author>By PAUL KRUGMAN</author><pubDate>Fri, 28 Apr 2017 03:21:22 -0500</pubDate></item>
59 |   </channel>
60 | </rss>
61 | 
62 | ---
63 | 
64 | feed.format:      rss 2.0
65 | feed.title:       Paul Krugman
66 | feed.url:         https://www.nytimes.com/column/paul-krugman
67 | feed.description: Paul Krugman, a New York Times Op-Ed columnist, writes about macroeconomics, trade, health care, social policy and politics. In 2008, he received the Nobel Prize in Economics.
68 | 
69 | feed.items[0].title:       Trump’s Energy, Low and Dirty
70 | feed.items[0].url:         https://www.nytimes.com/2017/05/29/opinion/trump-g-7-summit-energy.html
71 | feed.items[0].description: Risking the planet to keep a lie alive.
72 | feed.items[0].author.text: By PAUL KRUGMAN
73 | feed.items[0].published:   >>> DateTime.new( 2017, 5, 29, 3, 21, 9, '-5').utc
74 | 
75 | feed.items[1].title:       It’s All About Trump’s Contempt
76 | feed.items[1].url:         https://www.nytimes.com/2017/05/26/opinion/trumpcare-cbo-federal-budget.html
77 | feed.items[1].description: His budget and health plan show he despises his voters. Will they notice?
78 | feed.items[1].author.text: By PAUL KRUGMAN
79 | feed.items[1].published:   >>> DateTime.new( 2017, 5, 26, 3, 21,23, '-5').utc
80 | 


--------------------------------------------------------------------------------
/feeds/news/nytimes-thomas-l-friedman.rss:
--------------------------------------------------------------------------------
  1 | <rss version="2.0">
  2 |   <channel>
  3 |     <ttl>60</ttl>
  4 |     <title>Thomas L. Friedman</title>
  5 |     <link>https://www.nytimes.com/column/thomas-l-friedman</link>
  6 |     <description>Thomas L. Friedman, a New York Times Op-Ed columnist, writes about foreign affairs, globalization and technology.</description>
  7 |     <copyright>Copyright 2017 The New York Times Company</copyright>
  8 |     <language>en-us</language>
  9 |     <item>
 10 |       <title>A Road Trip Through Rusting and Rising America</title>
 11 |       <link>https://www.nytimes.com/2017/05/24/opinion/rusting-and-rising-america.html</link>
 12 |       <description>The comeback of distressed and lost communities is the story of Bill Clinton&#8217;s America, not Donald Trump&#8217;s.</description>
 13 |       <author>By THOMAS L. FRIEDMAN</author>
 14 |       <pubDate>Wed, 24 May 2017 03:21:08 -0500</pubDate>
 15 |     </item>
 16 |     <item>
 17 |       <title>It&#8217;s Chicken or Fish</title>
 18 |       <link>https://www.nytimes.com/2017/05/16/opinion/trump-republicans-2018-elections.html</link>
 19 |       <description>Elected Republicans won&#8217;t stand up to Trump&#8217;s abuse of power, like his asking James Comey to halt the Flynn-Russia inquiry. So now you have a choice.</description>
 20 |       <author>By THOMAS L. FRIEDMAN</author>
 21 |       <pubDate>Tue, 16 May 2017 19:41:47 -0500</pubDate>
 22 |     </item>
 23 |     <item>
 24 |       <title>Owning Your Own Future</title>
 25 |       <link>https://www.nytimes.com/2017/05/10/opinion/owning-your-own-future.html</link>
 26 |       <description>Stop learning and you could find yourself without a job.</description>
 27 |       <author>By THOMAS L. FRIEDMAN</author>
 28 |       <pubDate>Wed, 10 May 2017 03:21:01 -0500</pubDate>
 29 |     </item>
 30 |     <item>
 31 |       <title>Trump: Crazy Like a Fox, or Just Crazy?</title>
 32 |       <link>https://www.nytimes.com/2017/05/03/opinion/trump-crazy-like-a-fox-or-just-crazy.html</link>
 33 |       <description>The president&#8217;s remarks about his first 100 days have been simply bizarre.</description>
 34 |       <author>By THOMAS L. FRIEDMAN</author>
 35 |       <pubDate>Wed, 03 May 2017 03:21:08 -0500</pubDate>
 36 |     </item>
 37 |     <item>
 38 |       <title>On a Par 5 in Dubai, Good Humor and a Respite From All Things Trump</title>
 39 |       <link>https://www.nytimes.com/2017/04/26/opinion/on-a-par-5-in-dubai-good-humor-and-a-respite-from-all-things-trump.html</link>
 40 |       <description>Oh, and there&#8217;s this yogi with a flowing white beard and golf clothes.</description>
 41 |       <author>By THOMAS L. FRIEDMAN</author>
 42 |       <pubDate>Wed, 26 Apr 2017 03:21:15 -0500</pubDate>
 43 |     </item>
 44 |     <item>
 45 |       <title>Coal Museum Sees the Future; Trump Doesn&#8217;t</title>
 46 |       <link>https://www.nytimes.com/2017/04/19/opinion/coal-museum-sees-the-future-trump-doesnt.html</link>
 47 |       <description>The president is focused on prolonging a dying industry rather than on renewable energy, which is making the country stronger without him.</description>
 48 |       <author>By THOMAS L. FRIEDMAN</author>
 49 |       <pubDate>Wed, 19 Apr 2017 03:21:23 -0500</pubDate>
 50 |     </item>
 51 |     <item>
 52 |       <title>Why Is Trump Fighting ISIS in Syria?</title>
 53 |       <link>https://www.nytimes.com/2017/04/12/opinion/why-is-trump-fighting-isis-in-syria.html</link>
 54 |       <description>A strategy built on territory won&#8217;t ultimately eliminate the terrorist group.</description>
 55 |       <author>By THOMAS L. FRIEDMAN</author>
 56 |       <pubDate>Wed, 12 Apr 2017 03:21:13 -0500</pubDate>
 57 |     </item>
 58 |     <item>
 59 |       <title>President Trump&#8217;s Real-World Syria Lesson</title>
 60 |       <link>https://www.nytimes.com/2017/04/05/opinion/president-trumps-real-world-syria-lesson.html</link>
 61 |       <description>Doing nothing shouldn&#8217;t be an option.</description>
 62 |       <author>By THOMAS L. FRIEDMAN</author>
 63 |       <pubDate>Wed, 05 Apr 2017 03:21:22 -0500</pubDate>
 64 |     </item>
 65 |     <item>
 66 |       <title>Trump Is a Chinese Agent</title>
 67 |       <link>https://www.nytimes.com/2017/03/29/opinion/trump-is-a-chinese-agent.html</link>
 68 |       <description>Ignoring climate change and the benefits of clean energy only helps a rival.</description>
 69 |       <author>By THOMAS L. FRIEDMAN</author>
 70 |       <pubDate>Wed, 29 Mar 2017 03:21:26 -0500</pubDate>
 71 |     </item>
 72 |     <item>
 73 |       <title>Calling On a Few Good Men</title>
 74 |       <link>https://www.nytimes.com/2017/03/22/opinion/calling-on-a-few-good-men.html</link>
 75 |       <description>An open letter to the adults in the Trump administration with the most integrity.</description>
 76 |       <author>By THOMAS L. FRIEDMAN</author>
 77 |       <pubDate>Wed, 22 Mar 2017 03:21:26 -0500</pubDate>
 78 |     </item>
 79 |   </channel>
 80 | </rss>
 81 | 
 82 | ---
 83 | 
 84 | feed.format:      rss 2.0
 85 | feed.title:       Thomas L. Friedman
 86 | feed.url:         https://www.nytimes.com/column/thomas-l-friedman
 87 | feed.description: Thomas L. Friedman, a New York Times Op-Ed columnist, writes about foreign affairs, globalization and technology.
 88 | 
 89 | feed.items[0].title: A Road Trip Through Rusting and Rising America
 90 | feed.items[0].url:   https://www.nytimes.com/2017/05/24/opinion/rusting-and-rising-america.html
 91 | feed.items[0].description: [[
 92 |   The comeback of distressed and lost communities is the story of Bill Clinton’s America, not Donald Trump’s. ]]
 93 | feed.items[0].author.text: By THOMAS L. FRIEDMAN
 94 | feed.items[0].published:   >>> DateTime.new( 2017, 5, 24, 3, 21, 8, '-5' ).utc
 95 | 
 96 | feed.items[1].title:  It’s Chicken or Fish
 97 | feed.items[1].url:    https://www.nytimes.com/2017/05/16/opinion/trump-republicans-2018-elections.html
 98 | feed.items[1].description: [[
 99 |   Elected Republicans won’t stand up to Trump’s abuse of power,
100 |   like his asking James Comey to halt the Flynn-Russia inquiry. So now you have a choice. ]]
101 | feed.items[1].author.text: By THOMAS L. FRIEDMAN
102 | feed.items[1].published:   >>> DateTime.new( 2017, 5, 16, 19, 41, 47, '-5' ).utc
103 | 
104 | feed.items[2].title:       Owning Your Own Future
105 | feed.items[2].url:         https://www.nytimes.com/2017/05/10/opinion/owning-your-own-future.html
106 | feed.items[2].description: Stop learning and you could find yourself without a job.
107 | 


--------------------------------------------------------------------------------
/feeds/spec/atom/author.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title>Test Author</title>
 4 |   <link rel="alternate" type="text/html" href="http://example.com/" />
 5 |   <link rel="self" type="application/atom+xml" href="http://example.com/feed.xml" />
 6 |   <id>http://example.com</id>
 7 |   <updated>2017-05-23T19:36:44Z</updated>
 8 |   <author>
 9 | 		<name>John Jay Gruber</name>
10 | 		<uri>http://example.com/</uri>
11 | 	</author>
12 | 
13 |   <entry>
14 |   	<title>Feedbin, Too</title>
15 | 	  <link rel="alternate" type="text/html" href="https://feedbin.com/blog/2017/05/22/feedbin-supports-json-feed/" />
16 | 	  <id>tag:daringfireball.net,2017:/linked//6.33732</id>
17 | 	  <published>2017-05-23T18:50:36Z</published>
18 | 	  <updated>2017-05-23T19:03:33Z</updated>
19 | 	  <author>
20 | 		  <name>Max Gruber Jun.</name>
21 | 		  <uri>http://example.com/</uri>
22 | 	  </author>
23 |   	<content type="text">
24 |        Ben Ubois, announcing support for JSON Feed in Feedbin...
25 |     </content>
26 |   </entry>
27 | </feed>
28 | 
29 | ---
30 | 
31 | feed.authors.size:      >>> 1
32 | feed.authors[0].name:   John Jay Gruber
33 | feed.authors[0].url:	  http://example.com/
34 | feed.authors[0].email:	>>> nil
35 | 
36 | feed.author.name:  John Jay Gruber
37 | feed.author.url: 	 http://example.com/
38 | feed.author.email: >>> nil
39 | 
40 | 
41 | feed.items[0].authors.size:     >>> 1
42 | feed.items[0].authors[0].name:  Max Gruber Jun.
43 | feed.items[0].authors[0].url:	  http://example.com/
44 | feed.items[0].authors[0].email:	>>> nil
45 | 
46 | feed.items[0].author.name:   Max Gruber Jun.
47 | feed.items[0].author.url: 	 http://example.com/
48 | feed.items[0].author.email:  >>> nil
49 | 


--------------------------------------------------------------------------------
/feeds/spec/atom/authors.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title>Test Author</title>
 4 |   <link rel="alternate" type="text/html" href="http://example.com/" />
 5 |   <link rel="self" type="application/atom+xml" href="http://example.com/feed.xml" />
 6 |   <id>http://example.com</id>
 7 |   <updated>2017-05-23T19:36:44Z</updated>
 8 |   <author>
 9 | 		<name>John Jay Gruber</name>
10 | 		<uri>http://example.com/</uri>
11 | 	</author>
12 |   <author>
13 | 		<name>Frank Huber</name>
14 | 		<uri>http://example.com/frank/</uri>
15 | 	</author>
16 | 
17 |   <entry>
18 |   	<title>Feedbin, Too</title>
19 | 	  <link rel="alternate" type="text/html" href="https://feedbin.com/blog/2017/05/22/feedbin-supports-json-feed/" />
20 | 	  <id>tag:daringfireball.net,2017:/linked//6.33732</id>
21 | 	  <published>2017-05-23T18:50:36Z</published>
22 | 	  <updated>2017-05-23T19:03:33Z</updated>
23 | 	  <author>
24 | 		  <name>Max Gruber Jun.</name>
25 | 		  <uri>http://example.com/</uri>
26 | 	  </author>
27 |     <author>
28 |   		<name>Frank Huber</name>
29 |   		<uri>http://example.com/frank/</uri>
30 |       <email>frank@example.com</email>
31 |   	</author>
32 |     <author>
33 |   		<name>George Imker</name>
34 |   		<uri>http://example.com/george/</uri>
35 |   	</author>
36 |   	<content type="text">
37 |        Ben Ubois, announcing support for JSON Feed in Feedbin...
38 |     </content>
39 |   </entry>
40 | </feed>
41 | 
42 | ---
43 | 
44 | feed.authors.size:      >>> 2
45 | feed.authors[0].name:   John Jay Gruber
46 | feed.authors[0].url:	  http://example.com/
47 | feed.authors[0].email:	>>> nil
48 | feed.authors[1].name:   Frank Huber
49 | feed.authors[1].url:	  http://example.com/frank/
50 | 
51 | feed.author.name:  John Jay Gruber
52 | feed.author.url: 	 http://example.com/
53 | feed.author.email: >>> nil
54 | 
55 | 
56 | 
57 | feed.items[0].authors.size:     >>> 3
58 | feed.items[0].authors[0].name:  Max Gruber Jun.
59 | feed.items[0].authors[0].url:	  http://example.com/
60 | feed.items[0].authors[0].email:	>>> nil
61 | feed.items[0].authors[1].name:  Frank Huber
62 | feed.items[0].authors[1].url:	  http://example.com/frank/
63 | feed.items[0].authors[1].email:	frank@example.com
64 | feed.items[0].authors[2].name:  George Imker
65 | feed.items[0].authors[2].url:	  http://example.com/george/
66 | feed.items[0].authors[2].email:	>>> nil
67 | 
68 | feed.items[0].author.name:   Max Gruber Jun.
69 | feed.items[0].author.url: 	 http://example.com/
70 | feed.items[0].author.email:  >>> nil
71 | 


--------------------------------------------------------------------------------
/feeds/spec/atom/categories.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns='http://www.w3.org/2005/Atom'>
 3 |   <title>Test Categories</title>
 4 |   <link rel="alternate" type="text/html" href="http://example.com/" />
 5 |   <link rel="self" type="application/atom+xml" href="http://example.com/feed.xml" />
 6 |   <id>http://example.com</id>
 7 |   <updated>2017-05-23T19:36:44Z</updated>
 8 | 
 9 |    <category term="jruby"/>
10 |    <category term="ruby"/>
11 |    <category term="java"/>
12 |    <category term="rails"/>
13 |    <category term="invokedynamic"/>
14 |    <category term="jvm"/>
15 |    <category term="application bundle"/>
16 |    <category term="compilation"/>
17 |    <category term="compiler"/>
18 |    <category term="dynamic dispatch"/>
19 |    <category term="dynamic languages"/>
20 |    <category term="ruby 2.0"/>
21 | 
22 |    <entry>
23 | 
24 |      <title>Rock Surprise</title>
25 |      <link href='https://www.tbray.org/ongoing/When/201x/2017/05/20/Rock-Surprise' />
26 |      <id>https://www.tbray.org/ongoing/When/201x/2017/05/20/Rock-Surprise</id>
27 |      <published>2017-05-20T12:00:00-07:00</published>
28 |      <updated>2017-05-21T11:13:00-07:00</updated>
29 | 
30 |      <category scheme='https://www.tbray.org/ongoing/What/' term='Arts/Music' />
31 |      <category scheme='https://www.tbray.org/ongoing/What/' term='Arts' />
32 |      <category scheme='https://www.tbray.org/ongoing/What/' term='Music' />
33 |      <category scheme='https://www.tbray.org/ongoing/What/' term='Arts/Photos' />
34 |      <category scheme='https://www.tbray.org/ongoing/What/' term='Photos' />
35 | 
36 |    	<content type="text">
37 |       On a recent Saturday we accidentally took in two very
38 |       different pop-music concerts...
39 |      </content>
40 |    </entry>
41 |  </feed>
42 | 
43 | ---
44 | 
45 | feed.tags.size:     >>> 12
46 | feed.tags[0].name:  jruby
47 | feed.tags[1].name:  ruby
48 | feed.tags[2].name:  java
49 | feed.tags[3].name:  rails
50 | feed.tags[4].name:  invokedynamic
51 | feed.tags[5].name:  jvm
52 | feed.tags[6].name:  application bundle
53 | feed.tags[7].name:  compilation
54 | feed.tags[8].name:  compiler
55 | feed.tags[9].name:  dynamic dispatch
56 | feed.tags[10].name: dynamic languages
57 | feed.tags[11].name: ruby 2.0
58 | 
59 | 
60 | feed.items[0].tags.size:      >>> 5
61 | feed.items[0].tags[0].name:   Arts/Music
62 | feed.items[0].tags[0].scheme: https://www.tbray.org/ongoing/What/
63 | feed.items[0].tags[1].name:   Arts
64 | feed.items[0].tags[2].name:   Music
65 | feed.items[0].tags[3].name:   Arts/Photos
66 | feed.items[0].tags[4].name:   Photos
67 | 


--------------------------------------------------------------------------------
/feeds/spec/json/example.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "https://jsonfeed.org/version/1",
 3 |     "title": "My Example Feed",
 4 |     "home_page_url": "https://example.org/",
 5 |     "feed_url": "https://example.org/feed.json",
 6 |     "items": [
 7 |         {
 8 |             "id": "2",
 9 |             "content_text": "This is a second item.",
10 |             "url": "https://example.org/second-item"
11 |         },
12 |         {
13 |             "id": "1",
14 |             "content_html": "<p>Hello, world!</p>",
15 |             "url": "https://example.org/initial-post"
16 |         }
17 |     ]
18 | }
19 | 
20 | ---
21 | 
22 | feed.format:     json
23 | feed.title:      My Example Feed
24 | feed.url:        https://example.org/
25 | feed.feed_url:   https://example.org/feed.json
26 | 
27 | feed.items.size:    >>> 2
28 | 
29 | feed.items[0].id:           2
30 | feed.items[0].content_text: This is a second item.
31 | feed.items[0].url:          https://example.org/second-item
32 | 
33 | 
34 | feed.items[1].id:            1
35 | feed.items[1].content_html:  <p>Hello, world!</p>
36 | feed.items[1].url:           https://example.org/initial-post
37 | 


--------------------------------------------------------------------------------
/feeds/spec/json/microblog.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "https://jsonfeed.org/version/1",
 3 |     "user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
 4 |     "title": "Brent Simmons’s Microblog",
 5 |     "home_page_url": "https://example.org/",
 6 |     "feed_url": "https://example.org/feed.json",
 7 |     "author": {
 8 |         "name": "Brent Simmons",
 9 |         "url": "http://example.org/",
10 |         "avatar": "https://example.org/avatar.png"
11 |     },
12 |     "items": [
13 |         {
14 |             "id": "2347259",
15 |             "url": "https://example.org/2347259",
16 |             "content_text": "Cats are neat. \n\nhttps://example.org/cats",
17 |             "date_published": "2016-02-09T14:22:00-07:00"
18 |         }
19 |     ]
20 | }
21 | 
22 | ---
23 | 
24 | feed.format:     json
25 | feed.title:      Brent Simmons’s Microblog
26 | feed.url:        https://example.org/
27 | feed.feed_url:   https://example.org/feed.json
28 | 
29 | feed.authors.size:      >>> 1
30 | feed.authors[0].name:   Brent Simmons
31 | feed.authors[0].url:    http://example.org/
32 | feed.authors[0].avatar: https://example.org/avatar.png
33 | 
34 | 
35 | feed.items.size:            >>> 1
36 | feed.items[0].id:           2347259
37 | feed.items[0].url:          https://example.org/2347259
38 | feed.items[0].content_text: Cats are neat. \n\nhttps://example.org/cats
39 | feed.items[0].published:       >>> DateTime.new( 2016, 2, 9, 14, 22, 0, '-7').utc
40 | feed.items[0].published_local: >>> DateTime.new( 2016, 2, 9, 14, 22, 0, '-7')
41 | 
42 | >>> pp feed.items[0].published
43 | >>> pp feed.items[0].published_local
44 | 


--------------------------------------------------------------------------------
/feeds/spec/json/tags.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "https://jsonfeed.org/version/1",
 3 |     "title": "My Example Feed",
 4 |     "home_page_url": "https://example.org/",
 5 |     "feed_url": "https://example.org/feed.json",
 6 |     "items": [
 7 |         {
 8 |             "id": "2",
 9 |             "content_text": "This is a second item.",
10 |             "url": "https://example.org/second-item",
11 |             "tags": ["one", "two", "fourty four"]
12 |         },
13 |         {
14 |             "id": "1",
15 |             "content_html": "<p>Hello, world!</p>",
16 |             "url": "https://example.org/initial-post",
17 |             "tags": ["compiler","dynamic dispatch","dynamic languages","ruby 2.0"]
18 |         }
19 |     ]
20 | }
21 | 
22 | ---
23 | 
24 | feed.items[0].tags.size:     >>> 3
25 | feed.items[0].tags[0].name: one
26 | feed.items[0].tags[1].name: two
27 | feed.items[0].tags[2].name: fourty four
28 | 
29 | feed.items[1].tags.size:     >>> 4
30 | feed.items[1].tags[0].name: compiler
31 | feed.items[1].tags[1].name: dynamic dispatch
32 | feed.items[1].tags[2].name: dynamic languages
33 | feed.items[1].tags[3].name: ruby 2.0
34 | 


--------------------------------------------------------------------------------
/feeds/spec/microformats/hentry.html:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | <article class="h-entry">
 4 |   <h1 class="p-name">Microformats are amazing</h1>
 5 |   <p>Published by
 6 |     <a class="p-author h-card" href="http://example.com">W. Developer</a>
 7 |      on <time class="dt-published" datetime="2013-06-13 12:00:00">13<sup>th</sup> June 2013</time>
 8 | 
 9 |   <p class="p-summary">In which I extoll the virtues of using microformats.</p>
10 | 
11 |   <div class="e-content">
12 |     <p>Blah blah blah</p>
13 |   </div>
14 | </article>
15 | 


--------------------------------------------------------------------------------
/feeds/spec/rss/author.rss:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <rss version="2.0">
 3 |   <channel>
 4 |     <title>Test Author</title>
 5 |     <link>http://example.com/</link>
 6 |     <description/>
 7 |     <lastBuildDate>Mon, 29 May 2017 18:51:30 GMT </lastBuildDate>
 8 |     <managingEditor>Matt Flegenheimer Jr.</managingEditor>
 9 |     <webMaster>Matt Flegenheimer Sr.</webMaster>
10 |     <item>
11 |       <title>Jared Kushner&#x2019;s Role Is Tested as Russia Case Grows</title>
12 |       <link>https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html?partner=rss&amp;emc=rss</link>
13 |       <guid isPermaLink="true">https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html</guid>
14 |       <pubDate>Mon, 29 May 2017 01:23:57 GMT</pubDate>
15 |       <author>Peter Baker</author>
16 |     </item>
17 |   </channel>
18 | </rss>
19 | 
20 | ---
21 | 
22 | feed.authors.size:      >>> 2
23 | feed.authors[0].text:   Matt Flegenheimer Jr.
24 | feed.authors[0].to_s:	  Matt Flegenheimer Jr.
25 | feed.authors[0].email:	>>> nil
26 | feed.authors[1].text:   Matt Flegenheimer Sr.
27 | feed.authors[1].to_s:	  Matt Flegenheimer Sr.
28 | 
29 | feed.author.text:  Matt Flegenheimer Jr.
30 | feed.author.to_s:  Matt Flegenheimer Jr.
31 | feed.author.email: >>> nil
32 | 
33 | 
34 | feed.items[0].authors.size:     >>> 1
35 | feed.items[0].authors[0].text:  Peter Baker
36 | feed.items[0].authors[0].to_s:	Peter Baker
37 | feed.items[0].authors[0].email:	>>> nil
38 | 
39 | feed.items[0].author.text:   Peter Baker
40 | feed.items[0].author.to_s: 	 Peter Baker
41 | feed.items[0].author.email:  >>> nil
42 | 


--------------------------------------------------------------------------------
/feeds/spec/rss/categories.rss:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <rss version="2.0">
 3 |   <channel>
 4 |     <title>Test Categories</title>
 5 |     <link>http://example.com</link>
 6 |     <description/>
 7 |     <lastBuildDate>Mon, 29 May 2017 18:51:30 GMT </lastBuildDate>
 8 |     <category domain="http://www.nytimes.com/namespaces/keywords/des">United States Politics and Government</category>
 9 |     <category domain="http://www.nytimes.com/namespaces/keywords/des">Social Media</category>
10 |     <category domain="http://www.nytimes.com/namespaces/keywords/mdes">News and News Media</category>
11 | 
12 |     <item>
13 |       <title>Jared Kushner&#x2019;s Role Is Tested as Russia Case Grows</title>
14 |       <link>https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html?partner=rss&amp;emc=rss</link>
15 |       <guid isPermaLink="true">https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html</guid>
16 |       <description>It is unclear how Jared Kushner&#x2019;s high-profile woes will affect his hard-won partnership with his father-in-law, perhaps the most stable in an often unstable White House.</description>
17 |       <pubDate>Mon, 29 May 2017 01:23:57 GMT</pubDate>
18 |       <category domain="http://www.nytimes.com/namespaces/keywords/mdes">Presidents and Presidency (US)</category>
19 |       <category domain="http://www.nytimes.com/namespaces/keywords/des">Appointments and Executive Changes</category>
20 |       <category domain="http://www.nytimes.com/namespaces/keywords/nyt_geo">United States</category>
21 |       <category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Jared Kushner</category>
22 |       <category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Donald J. Trump</category>
23 |     </item>
24 | 
25 |     <item>
26 |       <title>Reported Talks by Jared Kushner With Russia Would Be &#x2018;Good Thing,&#x2019; Trump Official Says</title>
27 |       <link>https://www.nytimes.com/2017/05/28/us/politics/trump-returns-to-us-and-to-berating-newsmedia-on-twitter.html?partner=rss&amp;emc=rss</link>
28 |       <guid isPermaLink="true">https://www.nytimes.com/2017/05/28/us/politics/trump-returns-to-us-and-to-berating-newsmedia-on-twitter.html</guid>
29 |       <description>John F. Kelly, the homeland security secretary, played down reports of a back channel. The president, meanwhile, resumed using Twitter as a weapon.</description>
30 |       <pubDate>Sun, 28 May 2017 16:57:05 GMT</pubDate>
31 |       <category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Trump, Donald J</category>
32 |       <category domain="http://www.nytimes.com/namespaces/keywords/des">United States Politics and Government</category>
33 |       <category domain="http://www.nytimes.com/namespaces/keywords/des">Social Media</category>
34 |       <category domain="http://www.nytimes.com/namespaces/keywords/mdes">News and News Media</category>
35 |       <category domain="http://www.nytimes.com/namespaces/keywords/nyt_org_all">Twitter</category>
36 |     </item>
37 |   </channel>
38 | </rss>
39 | 
40 | ---
41 | 
42 | feed.tags.size:      >>> 3
43 | feed.tags[0].name:   United States Politics and Government
44 | feed.tags[0].domain: http://www.nytimes.com/namespaces/keywords/des
45 | feed.tags[1].name:   Social Media
46 | feed.tags[2].name:   News and News Media
47 | 
48 | 
49 | feed.items[0].tags.size:      >>> 5
50 | feed.items[0].tags[0].name:   Presidents and Presidency (US)
51 | feed.items[0].tags[0].domain: http://www.nytimes.com/namespaces/keywords/mdes
52 | feed.items[0].tags[1].name:   Appointments and Executive Changes
53 | feed.items[0].tags[2].name:   United States
54 | feed.items[0].tags[3].name:   Jared Kushner
55 | feed.items[0].tags[4].name:   Donald J. Trump
56 | 
57 | 
58 | feed.items[1].tags.size:      >>> 5
59 | feed.items[1].tags[0].name:   Trump, Donald J
60 | feed.items[1].tags[0].domain: http://www.nytimes.com/namespaces/keywords/nyt_per
61 | feed.items[1].tags[1].name:   United States Politics and Government
62 | feed.items[1].tags[2].name:   Social Media
63 | feed.items[1].tags[3].name:   News and News Media
64 | feed.items[1].tags[4].name:   Twitter
65 | 


--------------------------------------------------------------------------------
/feeds/spec/rss/creator.rss:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
 3 |   <channel>
 4 |     <title>Test Dublin Core</title>
 5 |     <link>http://example.com/</link>
 6 |     <description/>
 7 |     <lastBuildDate>Mon, 29 May 2017 18:51:30 GMT </lastBuildDate>
 8 |     <dc:creator>Matt Flegenheimer</dc:creator>
 9 |     <item>
10 |       <title>Jared Kushner&#x2019;s Role Is Tested as Russia Case Grows</title>
11 |       <link>https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html?partner=rss&amp;emc=rss</link>
12 |       <guid isPermaLink="true">https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html</guid>
13 |       <pubDate>Mon, 29 May 2017 01:23:57 GMT</pubDate>
14 |       <dc:creator>Peter Baker</dc:creator>
15 |     </item>
16 |   </channel>
17 | </rss>
18 | 
19 | ---
20 | 
21 | feed.authors.size:      >>> 1
22 | feed.authors[0].text:   Matt Flegenheimer
23 | feed.authors[0].to_s:	  Matt Flegenheimer
24 | feed.authors[0].email:	>>> nil
25 | 
26 | feed.author.text:  Matt Flegenheimer
27 | feed.author.to_s:  Matt Flegenheimer
28 | feed.author.email: >>> nil
29 | 
30 | 
31 | feed.items[0].authors.size:     >>> 1
32 | feed.items[0].authors[0].text:  Peter Baker
33 | feed.items[0].authors[0].to_s:	Peter Baker
34 | feed.items[0].authors[0].email:	>>> nil
35 | 
36 | feed.items[0].author.text:   Peter Baker
37 | feed.items[0].author.to_s: 	 Peter Baker
38 | feed.items[0].author.email:  >>> nil
39 | 


--------------------------------------------------------------------------------
/feeds/test/helper.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | require 'logutils'
  5 | require 'textutils'
  6 | 
  7 | # note: for now use "packaged" version in gem
  8 | #  (not the source in ../feedparser/lib)
  9 | require 'feedparser'
 10 | 
 11 | # note: add microformats support (optional)
 12 | require 'microformats'
 13 | 
 14 | 
 15 | 
 16 | require 'minitest/autorun'
 17 | 
 18 | 
 19 | LogUtils::Logger.root.level = :debug
 20 | 
 21 | 
 22 | 
 23 | def walk(root, &block)
 24 |   Dir.foreach(root) do |name|
 25 |     ## puts "name: #{name}"
 26 |     path = File.join(root, name)
 27 | 
 28 |     if name == '.' || name == '..'
 29 |       next
 30 |     elsif File.directory?( path )
 31 | 
 32 |       ## note: skip .git !!
 33 |       ##        test folder with ruby test scripts
 34 |       next if ['.git', 'test'].include?( name )
 35 | 
 36 |       puts "** directory: #{path}/"
 37 |       walk( path, &block )
 38 |     else
 39 |       puts "  #{name}"
 40 |       block.call( path )   ## same as yield( path )
 41 |     end
 42 |   end
 43 | end
 44 | 
 45 | 
 46 | ## add custom assert
 47 | module MiniTest
 48 | class Test
 49 | 
 50 | 
 51 |   ## note:
 52 |   ##   regex excape  bracket: [ to \[
 53 |   ##   \\ needs to get escaped twice e.g. (\\ becomes \)
 54 |   TXT_BEGIN  = "\\[\\["
 55 |   TXT_END    = "\\]\\]"
 56 | 
 57 | 
 58 |   def assert_feed( text, tests, opts={} )
 59 | 
 60 | 
 61 |     name =  opts[:name] || '<unknown>'
 62 | 
 63 | 
 64 |     feed = FeedParser::Parser.parse( text )
 65 | 
 66 |     ##################################################
 67 |     ## pass 1: remove blank lines & comment lines
 68 | 
 69 |     lines = []
 70 | 
 71 |     tests.each_line do |line|
 72 |       line = line.strip
 73 | 
 74 |       if line.start_with? '#'
 75 |         next     ## skip comment lines too
 76 |       end
 77 | 
 78 | 
 79 |       if line == '__END__'
 80 |         break    ## support end of file marker (skip/ignore all lines after __END__)
 81 |       end
 82 | 
 83 |       lines << line
 84 |     end
 85 | 
 86 | 
 87 |     #########################################
 88 |     ## pass 2: "fold" multi-line items
 89 |     ##  e.g.
 90 |     ##  feed.items[0].description: [[
 91 |     ##     In the United States, the social media giant has been an advocate of equal treatment of all Internet content.
 92 | 	  ##     In India, regulators who share that belief have effectively blocked a free Facebook service.
 93 |     ##  ]]
 94 |     ##    becomes =>:
 95 |     ##  feed.items[0].description: In the United States, the social media giant has been an advocate of equal treatment of all Internet content. In India, regulators who share that belief have effectively blocked a free Facebook service.
 96 |     ##
 97 | 
 98 |     ##
 99 |     ##  use [[> (instead of just [[)  to mark string as to preserve newlines
100 |     ##   or [[|  |]] (two brackets with pipe??) or [[[ ]]] (three brackets)  - why? why not?
101 |     ##  or use python style """ and """" - why? why not?
102 | 
103 | 
104 |     #######
105 |     ##  note: preserve blank lines in multi-line "verbatim" items
106 |     ##
107 | 
108 |     lines_ii = []
109 |     buf = ''
110 |     inside_txt = false
111 | 
112 |     lines.each do |line|
113 | 
114 |       if inside_txt == false
115 | 
116 |         if line =~ /#{TXT_BEGIN}/
117 |           s = StringScanner.new( line )
118 |           expr  = s.scan_until( /(?=#{TXT_BEGIN})/ )
119 |           _     = s.scan( /#{TXT_BEGIN}/ )
120 |           value = s.rest
121 | 
122 |           buf = ''   # reset
123 |           buf << expr.strip    # add expresion before TXT_BEGIN
124 | 
125 |           if value.nil? || value.strip.empty?
126 |             # add nothing ;-)
127 |           else
128 |             buf << ' '
129 |             buf << value.strip
130 |           end
131 |           inside_txt = true
132 |         else
133 |            if line =~ /^[ \t]*$/
134 |              next        ## skip blank lines (NOT in "verbatim" multi-line string blocks)
135 |            end
136 | 
137 |            lines_ii << line    # copy as is 1:1
138 |         end
139 |       else   ## inside_txt == true
140 |         if line =~ /#{TXT_END}/
141 |           s = StringScanner.new( line )
142 |           value = s.scan_until( /(?=#{TXT_END})/ )
143 |           _     = s.scan( /#{TXT_END}/ )
144 |           _     = s.rest
145 | 
146 |           if value.strip.empty?
147 |              # add nothing ;-)
148 |           else
149 |              buf << ' '
150 |              buf << value.strip
151 |           end
152 |           lines_ii << buf   ## add "folded" line
153 |           inside_txt = false
154 |         else
155 |           if line.strip.empty?
156 |             ##  empty lines get skipped for now => add support for mode with preserved newlines why? why not???
157 |           else
158 |             buf << " "   ## note: newline converter to just one space
159 |             buf << line.strip
160 |           end
161 |         end
162 |       end
163 |     end  # each lines
164 | 
165 | 
166 | 
167 |     #########################################
168 |     ## pass 3: eval asserts, finally ;-)
169 | 
170 |     lines_ii.each do |line|
171 | 
172 |       if line.start_with? '>>>'
173 |         ## for debugging allow "custom" code e.g. >>> pp feed.items[0].summary etc.
174 |         code = line[3..-1].strip
175 |         msg  = "eval in #{name}: >>> #{code}"
176 |       else
177 |         pos = line.index(':')   ## assume first colon (:) is separator
178 |         expr  = line[0...pos].strip    ## NOTE: do NOT include colon (thus, use tree dots ...)
179 |         value = line[pos+1..-1].strip
180 | 
181 |         ##  for ruby code use |>  or >> or >>> or =>  or $ or | or RUN or  ????
182 |         ##   otherwise assume "literal" string
183 | 
184 |         if value.start_with? '>>>'
185 |            value = value[3..-1].strip
186 |            msg   = "assert in #{name}: >>> #{expr}  ==  #{value}"
187 | 
188 |           if value == 'nil'
189 |              code = "assert_nil #{expr}, %{#{msg}}"  ## note: use assert_nil for nils
190 |           else
191 |              code = "assert_equal #{value}, #{expr}, %{#{msg}}"
192 |           end
193 |         else # assume value is a "plain" string
194 |           ## note use %{ } so value can include quotes ('') etc.
195 |           msg  = %{assert in #{name}: #{expr}  ==  "#{value}"}
196 |           code = "assert_equal %{#{value}}, #{expr}, %{#{msg}}"
197 |         end
198 |       end
199 | 
200 |       puts msg
201 |       eval( code )
202 |     end  # each line
203 |   end
204 | 
205 | end
206 | end # module MiniTest
207 | 


--------------------------------------------------------------------------------
/feeds/test/test_feeds.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./test test/test_feeds.rb
 6 | #  or better
 7 | #     rake test
 8 | 
 9 | require  'helper'
10 | 
11 | 
12 | class TestFeeds < MiniTest::Test
13 | 
14 |   def test_all
15 |      walk_feeds( '.' )
16 | 
17 |      ## parse_feeds( './news/guardian*' )
18 |      ## parse_feeds( './news/nytimes-blogs*' )
19 |   end  # method test_all
20 | 
21 | 
22 | 
23 | private
24 | 
25 | def walk_feeds( root='.' )
26 |     walk( root ) do |path|
27 | 
28 |        ## note: skip README, Rakefile etc.
29 |        ##    check for extensions
30 |        extname = File.extname( path )    # note: includes dot e.g. .json etc.
31 |        next unless ['.json', '.html', '.xml', '.rss', '.rss2', '.atom'].include?( extname )
32 | 
33 |        parse_feed( path )
34 |     end
35 | end # walk_feeds
36 | 
37 | 
38 | def parse_feeds( pattern )
39 |   files = Dir.glob( pattern )
40 |   files.each do |path|
41 |       puts "  #{path}"
42 |       parse_feed( path )
43 |   end
44 | end
45 | 
46 | 
47 | def parse_feed( path )
48 |        puts "  reading #{path} ..."
49 | 
50 |        b = BlockReader.from_file( path ).read
51 | 
52 |        ## puts "  [debug] block.size: #{b.size}"
53 |        text  = b[0]   ## block I: feed source text (xml, json, html, etc.)
54 |        tests = b[1]   ## block II: test assert source
55 | 
56 |        if tests.nil?
57 |          puts "!!!! test asserts missing in #{path} !!!"
58 |          ## exit 1
59 |        else
60 |          assert_feed( text, tests, name: path )
61 |        end
62 | end
63 | 
64 | end # class TestFeeds
65 | 


--------------------------------------------------------------------------------
/feedtxt.specs/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | {% include header.html %}
  3 | 
  4 | <!--
  5 |    change github.html to header.html - why? why not? 
  6 |   -->
  7 | 
  8 | # Feed.TXT - A Free Feeds Format in Plain Text w/ Structured Meta Data
  9 | 
 10 | 
 11 | What's Feed.TXT? Let's start with an example from the JSON Feed spec:
 12 | 
 13 | ```json
 14 | {
 15 |     "version": "https://jsonfeed.org/version/1",
 16 |     "title": "My Example Feed",
 17 |     "home_page_url": "https://example.org/",
 18 |     "feed_url": "https://example.org/feed.json",
 19 |     "items": [
 20 |         {
 21 |             "id": "2",
 22 |             "content_text": "This is a second item.",
 23 |             "url": "https://example.org/second-item"
 24 |         },
 25 |         {
 26 |             "id": "1",
 27 |             "content_html": "<p>Hello, world!</p>",
 28 |             "url": "https://example.org/initial-post"
 29 |         }
 30 |     ]
 31 | }
 32 | ```
 33 | 
 34 | Simple, isn't it? Let's try just text:
 35 | 
 36 | ```
 37 | |>>>
 38 |  title:          My Example Feed
 39 |  home_page_url:  https://example.org/
 40 |  feed_url:       https://example.org/feed.txt
 41 |  </>
 42 |  id:  2
 43 |  url: https://example.org/second-item
 44 |  ---
 45 |  This is a second item.
 46 |  </>
 47 |  id:  1
 48 |  url: https://example.org/initial-post
 49 |  ---
 50 |  Hello, world!
 51 | <<<| 
 52 | ```
 53 | 
 54 | Are you serious, really? Let's try another example from the JSON Feed spec:
 55 | 
 56 | ```json
 57 | {
 58 |     "version": "https://jsonfeed.org/version/1",
 59 |     "user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
 60 |     "title": "The Record",
 61 |     "home_page_url": "http://therecord.co/",
 62 |     "feed_url": "http://therecord.co/feed.json",
 63 |     "items": [
 64 |         {
 65 |             "id": "http://therecord.co/chris-parrish",
 66 |             "title": "Special #1 - Chris Parrish",
 67 |             "url": "http://therecord.co/chris-parrish",
 68 |             "content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.",
 69 |             "content_html": "Chris has worked at <a href=\"http://adobe.com/\">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href=\"http://aged-and-distilled.com/napkin/\">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href=\"http://www.ci.bainbridge-isl.wa.us/\">Bainbridge Island</a>, a quick ferry ride from Seattle.",
 70 |             "summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
 71 |             "date_published": "2014-05-09T14:04:00-07:00",
 72 |             "attachments": [
 73 |                 {
 74 |                     "url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
 75 |                     "mime_type": "audio/x-m4a",
 76 |                     "size_in_bytes": 89970236,
 77 |                     "duration_in_seconds": 6629
 78 |                 }
 79 |             ]
 80 |         }
 81 |     ]
 82 | }
 83 | ```
 84 | 
 85 | Yes, the world's 1st podcasting feed in plain text ;-) Let's try:
 86 | 
 87 | ```
 88 | |>>>
 89 |  comment: This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json
 90 |  title:   The Record
 91 |  home_page_url: http://therecord.co/
 92 |  feed_url:      http://therecord.co/feed.txt
 93 |  </>
 94 |  id:        http://therecord.co/chris-parrish
 95 |  title:     Special #1 - Chris Parrish
 96 |  url:       http://therecord.co/chris-parrish
 97 |  summary:   Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.
 98 |  published: 2014-05-09T14:04:00-07:00
 99 |  attachments:
100 |  - url:           http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a
101 |    mime_type:     audio/x-m4a
102 |    size_in_bytes: 89970236
103 |    duration_in_seconds: 6629
104 |  ---
105 |  Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. 
106 |  Chris's new company is Aged & Distilled with Guy English — which shipped [Napkin](2), 
107 |  a Mac app for visual collaboration. Chris is also the co-host of The Record. 
108 |  He lives on [Bainbridge Island][3], a quick ferry ride from Seattle.
109 |  
110 |  [1]: http://adobe.com/
111 |  [2]: http://aged-and-distilled.com/napkin/
112 |  [3]: http://www.ci.bainbridge-isl.wa.us/
113 | <<<|  
114 | ```
115 | 
116 | 
117 | ## Spec(ification) - How does it work?
118 | 
119 | A Feed.txt starts with a meta data block for the feed in YAML format
120 | followed by a list of items. Items start with a meta data block followed by the text
121 | using the markdown formatting conventions for structured text (headings, lists, tables, etc.) and
122 | hyperlinks. That's it. 
123 | 
124 | 
125 | ### Dividers - Begin / Next / End
126 | 
127 | Use `|>>>` to begin a Feed.txt feed. Note you use three or more `>>>` open brackets e.g.
128 | `|>>>>>>>>>>>>` also works.
129 | 
130 | Use `<<<|` to end a Feed.txt feed. Again note you can use three or more `<<<` closing brackets e.g.
131 | `<<<<<<<|` also works.
132 | 
133 | Use `</>` to break up items. That's it.
134 | 
135 | 
136 | 
137 | 
138 | ## Use JSON / JSON5 / HJSON / SON for Strucutured Meta Data - |{  }|
139 | 
140 | As an alternative you can use human JSON for meta data blocks. Let's try:
141 | 
142 | ```
143 | |{
144 |  title:          "My Example Feed"
145 |  home_page_url:  "https://example.org/"
146 |  feed_url:       "https://example.org/feed.txt"
147 |  }/{
148 |  id:  "2"
149 |  url: "https://example.org/second-item"
150 |  }
151 |  This is a second item.
152 |  }/{
153 |  id:  "1"
154 |  url: "https://example.org/initial-post"
155 |  }
156 |  Hello, world!
157 | }| 
158 | ```
159 | 
160 | Are you joking? Don't, like the more human JSON style. Let's retry in "classic" JSON:
161 | 
162 | ```
163 | |{
164 |  "title":          "My Example Feed",
165 |  "home_page_url":  "https://example.org/",
166 |  "feed_url":       "https://example.org/feed.txt"
167 |  }/{
168 |  "id":  "2",
169 |  "url": "https://example.org/second-item"
170 |  }
171 |  This is a second item.
172 |  }/{
173 |  "id":  "1",
174 |  "url": "https://example.org/initial-post"
175 |  }
176 |  Hello, world!
177 | }| 
178 | ```
179 | 
180 | ### Dividers - Begin / Next / End   (JSON Edition)
181 | 
182 | Change `|>>>` to `|{` to begin a Feed.txt feed. Note you use one or more `{` open curly brackets e.g. `|{%raw%}{{{{{%endraw%}` also works.
183 | 
184 | Change `<<<|` to `}|` to end a Feed.txt feed. Again note you can use one or more `}` closing brackets e.g. `{%raw%}}}}}{%endraw%}|` also works.
185 | 
186 | Change `</>` to `}/{` to break up items. That's it.
187 | 
188 | 
189 | Sorry, there's no XML alternative ;-)
190 | 
191 | 
192 | ## License
193 | 
194 | The Feed.TXT format and conventions are dedicated to the public domain.
195 | Use it as you please with no restrictions whatsoever.
196 | 
197 | ## Questions? Comments?
198 | 
199 | Send them along to the [wwwmake mailing list/forum](http://groups.google.com/group/wwwmake). Thanks.
200 | 
201 | 
202 | <!-- todo: move footer to layouts -->
203 | 
204 | Brought to you by [Manuscripts](https://github.com/manuscripts) and friends. You might also like [Bib.TXT](http://bibtxt.github.io) ;-).
205 | 
206 | 
207 | 
208 | 
209 | 


--------------------------------------------------------------------------------
/feedtxt.specs/_includes/header.html:
--------------------------------------------------------------------------------
 1 | <div style="margin-top: 12px !important;">
 2 |   <a href="https://github.com/feedtxt">Feed.TXT @ GitHub</a>
 3 | </div>
 4 |   
 5 | <!--
 6 |    try adding github star button 
 7 |   -->
 8 | 
 9 |  <!--
10 | <a class="github-button" href="https://github.com/feedtxt/feedtxt.github.io"
11 |     data-icon="octicon-star"
12 |     data-count-href="/feedtxt/feedtxt.github.io/stargazers"
13 |     data-count-api="/repos/feedtxt/feedtxt.github.io#stargazers_count"
14 |     data-count-aria-label="stargazers on GitHub"
15 |     aria-label="Star feedtxt/feedtxt.github.io on GitHub">Star</a>
16 |   -->
17 |   
18 | <!-- GitHub Buttons
19 |    Place this tag right after the last button or just before your close body tag.
20 |     see https://buttons.github.io
21 |   -->
22 |   <!--
23 | <script async defer id="github-bjs" src="https://buttons.github.io/buttons.js"></script>  
24 |  -->
25 | 


--------------------------------------------------------------------------------
/feedtxt/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /spec/examples.txt
 9 | /test/tmp/
10 | /test/version_tmp/
11 | /tmp/
12 | 
13 | # Used by dotenv library to load environment variables.
14 | # .env
15 | 
16 | ## Specific to RubyMotion:
17 | .dat*
18 | .repl_history
19 | build/
20 | *.bridgesupport
21 | build-iPhoneOS/
22 | build-iPhoneSimulator/
23 | 
24 | ## Specific to RubyMotion (use of CocoaPods):
25 | #
26 | # We recommend against adding the Pods directory to your .gitignore. However
27 | # you should judge for yourself, the pros and cons are mentioned at:
28 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29 | #
30 | # vendor/Pods/
31 | 
32 | ## Documentation cache and generated files:
33 | /.yardoc/
34 | /_yardoc/
35 | /doc/
36 | /rdoc/
37 | 
38 | ## Environment normalization:
39 | /.bundle/
40 | /vendor/bundle
41 | /lib/bundler/man/
42 | 
43 | # for a library or gem, you might want to ignore these files since the code is
44 | # intended to run in multiple environments; otherwise, check them in:
45 | # Gemfile.lock
46 | # .ruby-version
47 | # .ruby-gemset
48 | 
49 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50 | .rvmrc
51 | 


--------------------------------------------------------------------------------
/feedtxt/HISTORY.md:
--------------------------------------------------------------------------------
1 | 
2 | ### 0.0.1 / 2017-06-11
3 | 
4 | * Everything is new. First release
5 | 


--------------------------------------------------------------------------------
/feedtxt/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | Manifest.txt
 3 | README.md
 4 | Rakefile
 5 | lib/feedtxt.rb
 6 | lib/feedtxt/parser.rb
 7 | lib/feedtxt/parser/ini.rb
 8 | lib/feedtxt/parser/json.rb
 9 | lib/feedtxt/parser/yaml.rb
10 | lib/feedtxt/version.rb
11 | test/feeds/spec/example.ini.txt
12 | test/feeds/spec/example.json.txt
13 | test/feeds/spec/example.yaml.txt
14 | test/feeds/spec/podcast.ini.txt
15 | test/feeds/spec/podcast.json.txt
16 | test/feeds/spec/podcast.yaml.txt
17 | test/helper.rb
18 | test/test_ini.rb
19 | test/test_json.rb
20 | test/test_scanner.rb
21 | test/test_yaml.rb
22 | 


--------------------------------------------------------------------------------
/feedtxt/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/feedtxt/version.rb'
 3 | 
 4 | Hoe.spec 'feedtxt' do
 5 | 
 6 |   self.version = Feedtxt::VERSION
 7 | 
 8 |   self.summary = "feedtxt - reads Feed.TXT a.k.a. RSS (Really Simple Sharing) 5.0 ;-) - feeds in text (unicode) - publish & share posts, articles, podcasts, 'n' more"
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/feedtxt/feedtxt']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'wwwmake@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.licenses = ['Public Domain']
21 | 
22 |   ### todo
23 |   ##   add deps e.g. props gem for INI.load
24 | 
25 | 
26 |   self.spec_extras = {
27 |     required_ruby_version: '>= 1.9.2'
28 |   }
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/feedtxt/lib/feedtxt.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | # core and stdlibs
 5 | 
 6 | require 'strscan'    ## StringScanner
 7 | require 'json'
 8 | require 'yaml'
 9 | require 'date'
10 | require 'time'
11 | require 'pp'
12 | 
13 | 
14 | 
15 | # 3rd party gems/libs
16 | require 'logutils'
17 | require 'props'     ## used for IniFile.parse
18 | 
19 | 
20 | # our own code
21 | require 'feedtxt/version'  # let it always go first
22 | require 'feedtxt/parser'
23 | require 'feedtxt/parser/json'
24 | require 'feedtxt/parser/yaml'
25 | require 'feedtxt/parser/ini'
26 | 
27 | 
28 | 
29 | ##  add shortcut / alias e.g.
30 | ##   lets you use:
31 | ##    Feedtxt.parse  instead of Feedtxt::Parser.parse
32 | module Feedtxt
33 |   def self.parse( text, opts={} )
34 |     Parser.parse( text,  )
35 |   end
36 | 
37 |   INI  = IniParser     ## note: add a shortcut; lets you use Feedtxt::INI.parse
38 |   YAML = YamlParser    ## note: add a shortcut; lets you use Feedtxt::YAML.parse
39 |   JSON = JsonParser    ## note: add a shortcut; lets you use Feedttxt::JSON.parse
40 | end
41 | 
42 | 
43 | 
44 | # say hello
45 | puts Feedtxt.banner     if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
46 | 


--------------------------------------------------------------------------------
/feedtxt/lib/feedtxt/parser.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Feedtxt
 4 | 
 5 | 
 6 | class Parser
 7 | 
 8 |   include LogUtils::Logging
 9 | 
10 | 
11 |   ### convenience class/factory method
12 |   def self.parse( text, opts={} )
13 |     self.new( text ).parse
14 |   end
15 | 
16 |   ### Note: lets keep/use same API as RSS::Parser for now
17 |   def initialize( text )
18 |     @text = text
19 |   end
20 | 
21 | 
22 |   def parse
23 |     ## auto-detect format
24 |     ##   use "best" matching format (e.g. first match by pos(ition))
25 | 
26 |     klass = YAML           ## default to yamlparser for now
27 |     pos   = 9_999_999     ## todo:use  MAX INTEGER or something!!
28 | 
29 |     json = @text.index( /#{JSON::FEED_BEGIN}/ )
30 |     if json    # found e.g. not nil? incl. 0
31 |       pos   = json
32 |       klass = JSON
33 |     end
34 | 
35 |     ini = @text.index( /#{INI::FEED_BEGIN}/ )
36 |     if ini && ini < pos  # found e.g. not nil? and match before last?
37 |       pos   = ini
38 |       klass = INI
39 |     end
40 | 
41 |     yaml = @text.index( /#{YAML::FEED_BEGIN}/ )
42 |     if yaml && yaml < pos  # found e.g. not nil? and match before last?
43 |       pos   = yaml
44 |       klass = YAML
45 |     end
46 | 
47 |     feed = klass.parse( @text )
48 |     feed
49 |   end # method parse
50 | 
51 | end  # class Parser
52 | 
53 | end # module Feedtxt
54 | 


--------------------------------------------------------------------------------
/feedtxt/lib/feedtxt/parser/ini.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | module Feedtxt
  4 | 
  5 | 
  6 | class IniParser
  7 | 
  8 |   include LogUtils::Logging
  9 | 
 10 | 
 11 |   ### convenience class/factory method
 12 |   def self.parse( text, opts={} )
 13 |     self.new( text ).parse
 14 |   end
 15 | 
 16 |   ### Note: lets keep/use same API as RSS::Parser for now
 17 |   def initialize( text )
 18 |     @text = text
 19 |   end
 20 | 
 21 | 
 22 | 
 23 |   ## note:
 24 |   ##   regex excape  bracket: [ to \[
 25 |   ##   \\ needs to get escaped twice e.g. (\\ becomes \)
 26 |   ##  e.g. [>>>  or [>>>>>
 27 |   FEED_BEGIN = "^[ ]*\\[>>>+[ ]*$"    ## note: allow leading n trailing spaces; allow 3 or more brackets
 28 |   ##  e.g. <<<] or <<<<<<]
 29 |   FEED_END   = "^[ ]*<<<+\\][ ]*$"    ## note: allow leading n trailing spaces; allow 3 or more brackets
 30 | 
 31 |   ## e.g.</>  or <<</>>>
 32 |   FEED_NEXT  = "^[ ]*<+/>+[ ]*$"       ## pass 1: split/break up blocks
 33 |   ## e.g. --- or -----
 34 |   FEED_META  = "^[ ]*---+[ ]*$"       ## pass 2: break up item into metadata and content block
 35 | 
 36 | 
 37 | 
 38 |   def parse
 39 | 
 40 |     ## find start marker e.g. [>>>
 41 |     ##    use regex - allow three or more >>>>>> or <<<<<<
 42 |     ##    allow spaces before and after
 43 | 
 44 |     s = StringScanner.new( @text )
 45 | 
 46 |     prolog = s.scan_until( /(?=#{FEED_BEGIN})/ )
 47 |     ## pp prolog
 48 | 
 49 |     feed_begin = s.scan( /#{FEED_BEGIN}/ )
 50 |     if feed_begin.empty?    ## use blank? why? why not??
 51 |       ## nothing found return empty array for now; return nil - why? why not?
 52 |       puts "warn !!! no begin marker found e.g. |>>>"
 53 |       return []
 54 |     end
 55 | 
 56 | 
 57 |     buf =  s.scan_until( /(?=#{FEED_END})/ )
 58 |     buf = buf.strip    # remove leading and trailing whitespace
 59 | 
 60 |     feed_end = s.scan( /#{FEED_END}/ )
 61 |     if feed_end.empty?   ## use blank? why? why not??
 62 |       ## nothing found return empty array for now; return nil - why? why not?
 63 |       puts "warn !!! no end marker found e.g. <<<|"
 64 |       return []
 65 |     end
 66 | 
 67 | 
 68 |     ####
 69 |     ## pass 1: split blocks by </>
 70 |     ###    note: allows   <<<</>>>>
 71 | 
 72 |     blocks = buf.split( /#{FEED_NEXT}/ )
 73 |     ## pp blocks
 74 | 
 75 |     ## 1st block is feed meta data
 76 |     block1st = blocks.shift       ## get/remove 1st block from blocks
 77 |     block1st = block1st.strip     ## strip leading and trailing whitespace
 78 |     feed_metadata = ::INI.load( block1st )
 79 | 
 80 |     feed_items = []
 81 |     blocks.each do |block|
 82 |       ###   note: do NOT use split e.g.--- is used by markdown
 83 |       ##      only search for first --- to split (all others get ignored)
 84 |       ##    todo: make three dashes --- (3) not hard-coded (allow more)
 85 | 
 86 |       s2 = StringScanner.new( block )
 87 | 
 88 |       item_metadata = s2.scan_until( /(?=#{FEED_META})/ )
 89 |       item_metadata = item_metadata.strip    # remove leading and trailing whitespace
 90 |       item_metadata = ::INI.load( item_metadata )   ## convert to hash with inifile parser
 91 | 
 92 |       feed_meta = s2.scan( /#{FEED_META}/ )
 93 | 
 94 |       item_content = s2.rest
 95 |       item_content = item_content.strip     # remove leading and trailing whitespace
 96 | 
 97 |       feed_items << [item_metadata, item_content]
 98 |     end
 99 | 
100 |     [ feed_metadata, feed_items ]
101 |   end # method parse
102 | 
103 | 
104 | end  # class IniParser
105 | 
106 | 
107 | end # module Feedtxt
108 | 


--------------------------------------------------------------------------------
/feedtxt/lib/feedtxt/parser/json.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | module Feedtxt
  4 | 
  5 | 
  6 | class JsonParser
  7 | 
  8 |   include LogUtils::Logging
  9 | 
 10 | 
 11 |   ### convenience class/factory method
 12 |   def self.parse( text, opts={} )
 13 |     self.new( text ).parse
 14 |   end
 15 | 
 16 |   ### Note: lets keep/use same API as RSS::Parser for now
 17 |   def initialize( text )
 18 |     @text = text
 19 |   end
 20 | 
 21 | 
 22 | 
 23 |   ## note:
 24 |   ##   regex excape  pipe: | to \|
 25 |   ##   \\ needs to get escaped twice e.g. (\\ becomes \)
 26 |   ##  e.g. |{  or |{{{
 27 |   FEED_BEGIN = "^[ ]*\\|{+[ ]*$"    ## note: allow leading n trailing spaces; allow 3 or more brackets
 28 |   ##  e.g. }| or }}}|
 29 |   FEED_END   = "^[ ]*}+\\|[ ]*$"    ## note: allow leading n trailing spaces; allow 3 or more brackets
 30 | 
 31 |   ## e.g.}/{  or }}}/{{{
 32 |   ##   todo/check: also allow }///{ or } /// { why,why not?
 33 |   FEED_NEXT  = "^[ ]*}+/{+[ ]*$"       ## pass 1: split/break up blocks
 34 | 
 35 |   ## e.g. }---{ or }}}---{{{ or }-{
 36 |   ##   todo/check: also allow }.{ with dot why? why not?
 37 |   ##   also allow  } - { or } ---- { why? why not?
 38 |   FEED_META  = "^[ ]*}+-+{+[ ]*$"       ## pass 2: break up item into metadata and content block
 39 | 
 40 | 
 41 | 
 42 |   def parse
 43 | 
 44 |     ## find start marker e.g. |>>>
 45 |     ##    use regex - allow three or more >>>>>> or <<<<<<
 46 |     ##    allow spaces before and after
 47 | 
 48 |     s = StringScanner.new( @text )
 49 | 
 50 |     prolog = s.scan_until( /(?=#{FEED_BEGIN})/ )
 51 |     ## pp prolog
 52 | 
 53 |     feed_begin = s.scan( /#{FEED_BEGIN}/ )
 54 |     if feed_begin.empty?    ## use blank? why? why not??
 55 |       ## nothing found return empty array for now; return nil - why? why not?
 56 |       puts "warn !!! no begin marker found e.g. |>>>"
 57 |       return []
 58 |     end
 59 | 
 60 | 
 61 |     buf =  s.scan_until( /(?=#{FEED_END})/ )
 62 |     buf = buf.strip    # remove leading and trailing whitespace
 63 | 
 64 |     feed_end = s.scan( /#{FEED_END}/ )
 65 |     if feed_end.empty?   ## use blank? why? why not??
 66 |       ## nothing found return empty array for now; return nil - why? why not?
 67 |       puts "warn !!! no end marker found e.g. <<<|"
 68 |       return []
 69 |     end
 70 | 
 71 | 
 72 |     ####
 73 |     ## pass 1: split blocks by }/{
 74 |     ###    note: allows   }}}/{{{
 75 | 
 76 |     blocks = buf.split( /#{FEED_NEXT}/ )
 77 |     ## pp blocks
 78 | 
 79 |     ## 1st block is feed meta data
 80 |     block1st = blocks.shift       ## get/remove 1st block from blocks
 81 |     block1st = block1st.strip    # remove leading and trailing whitespaces
 82 |     feed_metadata = ::JSON.parse( "{ #{block1st} }" )
 83 | 
 84 |     feed_items = []
 85 |     blocks.each do |block|
 86 |       ###   note: do NOT use split e.g.--- is used by markdown
 87 |       ##      only search for first --- to split (all others get ignored)
 88 |       ##    todo: make three dashes --- (3) not hard-coded (allow more)
 89 | 
 90 |       s2 = StringScanner.new( block )
 91 | 
 92 |       item_metadata = s2.scan_until( /(?=#{FEED_META})/ )
 93 |       item_metadata = item_metadata.strip    # remove leading and trailing whitespace
 94 |       item_metadata = ::JSON.parse( "{ #{item_metadata} }" )   ## convert to hash with yaml
 95 | 
 96 |       feed_meta = s2.scan( /#{FEED_META}/ )
 97 | 
 98 |       item_content = s2.rest
 99 |       item_content = item_content.strip     # remove leading and trailing whitespace
100 | 
101 |       feed_items << [item_metadata, item_content]
102 |     end
103 | 
104 |     [ feed_metadata, feed_items ]
105 |   end # method parse
106 | 
107 | 
108 | end  # class JsonParser
109 | 
110 | end # module Feedtxt
111 | 


--------------------------------------------------------------------------------
/feedtxt/lib/feedtxt/parser/yaml.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | module Feedtxt
  4 | 
  5 | 
  6 | class YamlParser
  7 | 
  8 |   include LogUtils::Logging
  9 | 
 10 | 
 11 |   ### convenience class/factory method
 12 |   def self.parse( text, opts={} )
 13 |     self.new( text ).parse
 14 |   end
 15 | 
 16 |   ### Note: lets keep/use same API as RSS::Parser for now
 17 |   def initialize( text )
 18 |     @text = text
 19 |   end
 20 | 
 21 | 
 22 | 
 23 |   ## note:
 24 |   ##   regex excape  pipe: | to \|
 25 |   ##   \\ needs to get escaped twice e.g. (\\ becomes \)
 26 |   ##  e.g. |>>>  or |>>>>>
 27 |   FEED_BEGIN = "^[ ]*\\|>>>+[ ]*$"    ## note: allow leading n trailing spaces; allow 3 or more brackets
 28 |   ##  e.g. <<<| or <<<<<<|
 29 |   FEED_END   = "^[ ]*<<<+\\|[ ]*$"    ## note: allow leading n trailing spaces; allow 3 or more brackets
 30 | 
 31 |   ## e.g.</>  or <<</>>>
 32 |   FEED_NEXT  = "^[ ]*<+/>+[ ]*$"       ## pass 1: split/break up blocks
 33 |   ## e.g. --- or -----
 34 |   FEED_META  = "^[ ]*---+[ ]*$"       ## pass 2: break up item into metadata and content block
 35 | 
 36 | 
 37 | 
 38 |   def parse
 39 | 
 40 |     ## find start marker e.g. |>>>
 41 |     ##    use regex - allow three or more >>>>>> or <<<<<<
 42 |     ##    allow spaces before and after
 43 | 
 44 |     s = StringScanner.new( @text )
 45 | 
 46 |     prolog = s.scan_until( /(?=#{FEED_BEGIN})/ )
 47 |     ## pp prolog
 48 | 
 49 |     feed_begin = s.scan( /#{FEED_BEGIN}/ )
 50 |     if feed_begin.empty?    ## use blank? why? why not??
 51 |       ## nothing found return empty array for now; return nil - why? why not?
 52 |       puts "warn !!! no begin marker found e.g. |>>>"
 53 |       return []
 54 |     end
 55 | 
 56 | 
 57 |     buf =  s.scan_until( /(?=#{FEED_END})/ )
 58 |     buf = buf.strip    # remove leading and trailing whitespace
 59 | 
 60 |     feed_end = s.scan( /#{FEED_END}/ )
 61 |     if feed_end.empty?   ## use blank? why? why not??
 62 |       ## nothing found return empty array for now; return nil - why? why not?
 63 |       puts "warn !!! no end marker found e.g. <<<|"
 64 |       return []
 65 |     end
 66 | 
 67 | 
 68 |     ####
 69 |     ## pass 1: split blocks by </>
 70 |     ###    note: allows   <<<</>>>>
 71 | 
 72 |     blocks = buf.split( /#{FEED_NEXT}/ )
 73 |     ## pp blocks
 74 | 
 75 |     ## 1st block is feed meta data
 76 |     block1st = blocks.shift       ## get/remove 1st block from blocks
 77 |     block1st = block1st.strip     ## strip leading and trailing whitespace
 78 |     feed_metadata = ::YAML.load( block1st )
 79 | 
 80 |     feed_items = []
 81 |     blocks.each do |block|
 82 |       ###   note: do NOT use split e.g.--- is used by markdown
 83 |       ##      only search for first --- to split (all others get ignored)
 84 |       ##    todo: make three dashes --- (3) not hard-coded (allow more)
 85 | 
 86 |       s2 = StringScanner.new( block )
 87 | 
 88 |       item_metadata = s2.scan_until( /(?=#{FEED_META})/ )
 89 |       item_metadata = item_metadata.strip    # remove leading and trailing whitespace
 90 |       item_metadata = ::YAML.load( item_metadata )   ## convert to hash with yaml
 91 | 
 92 |       feed_meta = s2.scan( /#{FEED_META}/ )
 93 | 
 94 |       item_content = s2.rest
 95 |       item_content = item_content.strip     # remove leading and trailing whitespace
 96 | 
 97 |       feed_items << [item_metadata, item_content]
 98 |     end
 99 | 
100 |     [ feed_metadata, feed_items ]
101 |   end # method parse
102 | 
103 | 
104 | end  # class YamlParser
105 | 
106 | end # module Feedtxt
107 | 


--------------------------------------------------------------------------------
/feedtxt/lib/feedtxt/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Feedtxt
 4 | 
 5 |   MAJOR = 1
 6 |   MINOR = 0
 7 |   PATCH = 1
 8 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
 9 | 
10 |   def self.version
11 |     VERSION
12 |   end
13 | 
14 |   def self.banner
15 |     "feedtxt/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16 |   end
17 | 
18 |   def self.root
19 |     "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
20 |   end
21 | 
22 | end # module Feedtxt
23 | 


--------------------------------------------------------------------------------
/feedtxt/test/feeds/spec/example.ini.txt:
--------------------------------------------------------------------------------
 1 | [>>>
 2 | title         = My Example Feed
 3 | home_page_url = https://example.org/
 4 | feed_url      = https://example.org/feed.txt
 5 | </>
 6 | id  = 2
 7 | url = https://example.org/second-item
 8 | ---
 9 | This is a second item.
10 | </>
11 | id  = 1
12 | url = https://example.org/initial-post
13 | ---
14 | Hello, world!
15 | <<<]
16 | 


--------------------------------------------------------------------------------
/feedtxt/test/feeds/spec/example.json.txt:
--------------------------------------------------------------------------------
 1 | |{
 2 | "title":          "My Example Feed",
 3 | "home_page_url":  "https://example.org/",
 4 | "feed_url":       "https://example.org/feed.txt"
 5 | }/{
 6 | "id":  "2",
 7 | "url": "https://example.org/second-item"
 8 | }-{
 9 | This is a second item.
10 | }/{
11 | "id":  "1",
12 | "url": "https://example.org/initial-post"
13 | }-{
14 | Hello, world!
15 | }|
16 | 


--------------------------------------------------------------------------------
/feedtxt/test/feeds/spec/example.yaml.txt:
--------------------------------------------------------------------------------
 1 | |>>>
 2 | title:          "My Example Feed"
 3 | home_page_url:  "https://example.org/"
 4 | feed_url:       "https://example.org/feed.txt"
 5 | </>
 6 | id:  "2"
 7 | url: "https://example.org/second-item"
 8 | ---
 9 | This is a second item.
10 | </>
11 | id:  "1"
12 | url: "https://example.org/initial-post"
13 | ---
14 | Hello, world!
15 | <<<|
16 | 


--------------------------------------------------------------------------------
/feedtxt/test/feeds/spec/podcast.ini.txt:
--------------------------------------------------------------------------------
 1 | [>>>
 2 | comment  =  This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json
 3 | title    =  The Record
 4 | home_page_url = http://therecord.co/
 5 | feed_url =      http://therecord.co/feed.txt
 6 | </>
 7 | id        = http://therecord.co/chris-parrish
 8 | title     = Special #1 - Chris Parrish
 9 | url       = http://therecord.co/chris-parrish
10 | summary   = Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.
11 | published = 2014-05-09T14:04:00-07:00
12 | [attachments]
13 |  url           = http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a
14 |  mime_type     = audio/x-m4a
15 |  size_in_bytes = 89970236
16 |  duration_in_seconds = 6629
17 | ---
18 | Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage.
19 | Chris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2),
20 | a Mac app for visual collaboration. Chris is also the co-host of The Record.
21 | He lives on [Bainbridge Island][3], a quick ferry ride from Seattle.
22 | 
23 | [1]: http://adobe.com/
24 | [2]: http://aged-and-distilled.com/napkin/
25 | [3]: http://www.ci.bainbridge-isl.wa.us/
26 | <<<]
27 | 


--------------------------------------------------------------------------------
/feedtxt/test/feeds/spec/podcast.json.txt:
--------------------------------------------------------------------------------
 1 | |{
 2 | "comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
 3 | "title":   "The Record",
 4 | "home_page_url": "http://therecord.co/",
 5 | "feed_url":      "http://therecord.co/feed.txt"
 6 | }/{
 7 | "id":        "http://therecord.co/chris-parrish",
 8 | "title":     "Special #1 - Chris Parrish",
 9 | "url":       "http://therecord.co/chris-parrish",
10 | "summary":   "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
11 | "published": "2014-05-09T14:04:00-07:00",
12 | "attachments": [
13 |   {
14 |    "url":           "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
15 |    "mime_type":     "audio/x-m4a",
16 |    "size_in_bytes": 89970236,
17 |    "duration_in_seconds": 6629
18 |   }
19 |  ]
20 | }-{
21 | Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage.
22 | Chris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2),
23 | a Mac app for visual collaboration. Chris is also the co-host of The Record.
24 | He lives on [Bainbridge Island][3], a quick ferry ride from Seattle.
25 | 
26 | [1]: http://adobe.com/
27 | [2]: http://aged-and-distilled.com/napkin/
28 | [3]: http://www.ci.bainbridge-isl.wa.us/
29 | }|
30 | 


--------------------------------------------------------------------------------
/feedtxt/test/feeds/spec/podcast.yaml.txt:
--------------------------------------------------------------------------------
 1 | |>>>
 2 | comment: "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json"
 3 | title:   "The Record"
 4 | home_page_url: "http://therecord.co/"
 5 | feed_url:      "http://therecord.co/feed.txt"
 6 | </>
 7 | id:        "http://therecord.co/chris-parrish"
 8 | title:     "Special #1 - Chris Parrish"
 9 | url:       "http://therecord.co/chris-parrish"
10 | summary:   "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled."
11 | published: 2014-05-09T14:04:00-07:00
12 | attachments:
13 | - url:           "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a"
14 |   mime_type:     "audio/x-m4a"
15 |   size_in_bytes: 89970236
16 |   duration_in_seconds: 6629
17 | ---
18 | Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage.
19 | Chris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2),
20 | a Mac app for visual collaboration. Chris is also the co-host of The Record.
21 | He lives on [Bainbridge Island][3], a quick ferry ride from Seattle.
22 | 
23 | [1]: http://adobe.com/
24 | [2]: http://aged-and-distilled.com/napkin/
25 | [3]: http://www.ci.bainbridge-isl.wa.us/
26 | <<<|
27 | 


--------------------------------------------------------------------------------
/feedtxt/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | 
 4 | ## minitest setup
 5 | 
 6 | require 'minitest/autorun'
 7 | 
 8 | require 'logutils'
 9 | require 'textutils'
10 | 
11 | 
12 | ## our own code
13 | require 'feedtxt'
14 | 
15 | 
16 | 
17 | LogUtils::Logger.root.level = :debug
18 | 
19 | 
20 | def read_text( name )
21 |   text = File.read( "#{Feedtxt.root}/test/feeds/#{name}.txt" )
22 |   text
23 | end
24 | 


--------------------------------------------------------------------------------
/feedtxt/test/test_ini.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_ini.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | class TestIni < MiniTest::Test
11 | 
12 |   def test_example
13 | 
14 |     text = read_text( 'spec/example.ini' )
15 |     pp text
16 | 
17 |     exp = [
18 |       {"title"=>"My Example Feed",
19 |   "home_page_url"=>"https://example.org/",
20 |   "feed_url"=>"https://example.org/feed.txt"},
21 |     [[
22 |       {"id"=>"2", "url"=>"https://example.org/second-item"},
23 |        "This is a second item."
24 |      ],
25 |      [
26 |        {"id"=>"1", "url"=>"https://example.org/initial-post"},
27 |        "Hello, world!"
28 |     ]]]
29 | 
30 |     assert_equal exp, Feedtxt::INI.parse( text )
31 |     assert_equal exp, Feedtxt.parse( text )     ## try shortcut alias too
32 |   end
33 | 
34 |   def test_podcast
35 | 
36 |     text = read_text( 'spec/podcast.ini' )
37 |     pp text
38 | 
39 |     exp = [{"comment"=>
40 |    "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
41 |   "title"=>"The Record",
42 |   "home_page_url"=>"http://therecord.co/",
43 |   "feed_url"=>"http://therecord.co/feed.txt"},
44 |  [[{"id"=>"http://therecord.co/chris-parrish",
45 |     "title"=>"Special",
46 |     "url"=>"http://therecord.co/chris-parrish",
47 |     "summary"=>
48 |      "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
49 |     "published"=>"2014-05-09T14:04:00-07:00",
50 |     "attachments"=>
51 |      {"url"=>"http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
52 |       "mime_type"=>"audio/x-m4a",
53 |       "size_in_bytes"=>"89970236",
54 |       "duration_in_seconds"=>"6629"}},
55 |    "Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage.\nChris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2),\na Mac app for visual collaboration. Chris is also the co-host of The Record.\nHe lives on [Bainbridge Island][3], a quick ferry ride from Seattle.\n\n[1]: http://adobe.com/\n[2]: http://aged-and-distilled.com/napkin/\n[3]: http://www.ci.bainbridge-isl.wa.us/"]]]
56 | 
57 |     assert_equal exp, Feedtxt::INI.parse( text )
58 |     assert_equal exp, Feedtxt.parse( text )   ## try shortcut alias too
59 |   end
60 | 
61 | 
62 | end # class TestIni
63 | 


--------------------------------------------------------------------------------
/feedtxt/test/test_json.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_json.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | class TestJson < MiniTest::Test
11 | 
12 |   def test_example
13 | 
14 |     text = read_text( 'spec/example.json' )
15 |     pp text
16 | 
17 |     exp = [
18 |       {"title"=>"My Example Feed",
19 |   "home_page_url"=>"https://example.org/",
20 |   "feed_url"=>"https://example.org/feed.txt"},
21 |     [[
22 |       {"id"=>"2", "url"=>"https://example.org/second-item"},
23 |        "This is a second item."
24 |      ],
25 |      [
26 |        {"id"=>"1", "url"=>"https://example.org/initial-post"},
27 |        "Hello, world!"
28 |     ]]]
29 | 
30 |     assert_equal exp, Feedtxt::JSON.parse( text )
31 |     assert_equal exp, Feedtxt.parse( text )     ## try shortcut alias too
32 |   end
33 | 
34 |   def test_podcast
35 | 
36 |     text = read_text( 'spec/podcast.json' )
37 |     pp text
38 | 
39 |     exp =[{"comment"=>
40 |    "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
41 |   "title"=>"The Record",
42 |   "home_page_url"=>"http://therecord.co/",
43 |   "feed_url"=>"http://therecord.co/feed.txt"},
44 |  [[{"id"=>"http://therecord.co/chris-parrish",
45 |     "title"=>"Special #1 - Chris Parrish",
46 |     "url"=>"http://therecord.co/chris-parrish",
47 |     "summary"=>
48 |      "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
49 |     "published"=> "2014-05-09T14:04:00-07:00",
50 |     "attachments"=>
51 |      [{"url"=>
52 |         "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
53 |        "mime_type"=>"audio/x-m4a",
54 |        "size_in_bytes"=>89970236,
55 |        "duration_in_seconds"=>6629}]},
56 |    "Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage.\nChris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2),\na Mac app for visual collaboration. Chris is also the co-host of The Record.\nHe lives on [Bainbridge Island][3], a quick ferry ride from Seattle.\n\n[1]: http://adobe.com/\n[2]: http://aged-and-distilled.com/napkin/\n[3]: http://www.ci.bainbridge-isl.wa.us/"]]]
57 | 
58 |     assert_equal exp, Feedtxt::JSON.parse( text )
59 |     assert_equal exp, Feedtxt.parse( text )   ## try shortcut alias too
60 |   end
61 | 
62 | 
63 | end # class TestYaml
64 | 


--------------------------------------------------------------------------------
/feedtxt/test/test_scanner.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_scanner.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | class TestScanner < MiniTest::Test
11 | 
12 |   ## note:
13 |   ##   regex excape  pipe: | to \|
14 |   ##   note: \\ needs to get escaped twice e.g. (\\ becomes \)
15 |   FEED_BEGIN = %{^[ ]*\\|>>>+[ ]*$}
16 |   FEED_END   = %{^[ ]*<<<+\\|[ ]*$}
17 | 
18 |   def test_scan
19 | 
20 | text =<<TXT
21 | bla bla bla
22 | |>>>
23 | title:          "My Example Feed"
24 | home_page_url:  "https://example.org/"
25 | feed_url:       "https://example.org/feed.txt"
26 | </>
27 | id:  "2"
28 | url: "https://example.org/second-item"
29 | ---
30 | This is a second item.
31 | </>
32 | id:  "1"
33 | url: "https://example.org/initial-post"
34 | ---
35 | Hello, world!
36 | <<<|
37 | TXT
38 | 
39 |     s = StringScanner.new( text )
40 | 
41 |     prolog = s.scan_until( /(?=#{FEED_BEGIN})/ )
42 |     pp prolog
43 | 
44 |     feed_begin = s.scan( /#{FEED_BEGIN}/ )
45 |     assert_equal '|>>>', feed_begin
46 | 
47 |     body =  s.scan_until( /(?=#{FEED_END})/ )
48 |     pp body
49 | 
50 |     feed_end = s.scan( /#{FEED_END}/ )
51 |     assert_equal '<<<|', feed_end
52 | 
53 |     assert true
54 |   end
55 | 
56 | end # class TestScanner
57 | 


--------------------------------------------------------------------------------
/feedtxt/test/test_yaml.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_yaml.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | class TestYaml < MiniTest::Test
11 | 
12 |   def test_example
13 | 
14 |     text = read_text( 'spec/example.yaml' )
15 |     pp text
16 | 
17 |     exp = [
18 |       {"title"=>"My Example Feed",
19 |   "home_page_url"=>"https://example.org/",
20 |   "feed_url"=>"https://example.org/feed.txt"},
21 |     [[
22 |       {"id"=>"2", "url"=>"https://example.org/second-item"},
23 |        "This is a second item."
24 |      ],
25 |      [
26 |        {"id"=>"1", "url"=>"https://example.org/initial-post"},
27 |        "Hello, world!"
28 |     ]]]
29 | 
30 |     assert_equal exp, Feedtxt::YAML.parse( text )
31 |     assert_equal exp, Feedtxt.parse( text )     ## try shortcut alias too
32 |   end
33 | 
34 |   def test_podcast
35 | 
36 |     text = read_text( 'spec/podcast.yaml' )
37 |     pp text
38 | 
39 |     exp =[{"comment"=>
40 |    "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
41 |   "title"=>"The Record",
42 |   "home_page_url"=>"http://therecord.co/",
43 |   "feed_url"=>"http://therecord.co/feed.txt"},
44 |  [[{"id"=>"http://therecord.co/chris-parrish",
45 |     "title"=>"Special #1 - Chris Parrish",
46 |     "url"=>"http://therecord.co/chris-parrish",
47 |     "summary"=>
48 |      "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
49 |     "published"=>DateTime.new( 2014, 5, 9, 23, 4, 0, '+02'),
50 |     "attachments"=>
51 |      [{"url"=>
52 |         "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
53 |        "mime_type"=>"audio/x-m4a",
54 |        "size_in_bytes"=>89970236,
55 |        "duration_in_seconds"=>6629}]},
56 |    "Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage.\nChris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2),\na Mac app for visual collaboration. Chris is also the co-host of The Record.\nHe lives on [Bainbridge Island][3], a quick ferry ride from Seattle.\n\n[1]: http://adobe.com/\n[2]: http://aged-and-distilled.com/napkin/\n[3]: http://www.ci.bainbridge-isl.wa.us/"]]]
57 | 
58 |     assert_equal exp, Feedtxt::YAML.parse( text )
59 |     assert_equal exp, Feedtxt.parse( text )   ## try shortcut alias too
60 |   end
61 | 
62 | 
63 | end # class TestYaml
64 | 


--------------------------------------------------------------------------------
/hyperdata/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /spec/examples.txt
 9 | /test/tmp/
10 | /test/version_tmp/
11 | /tmp/
12 | 
13 | # Used by dotenv library to load environment variables.
14 | # .env
15 | 
16 | ## Specific to RubyMotion:
17 | .dat*
18 | .repl_history
19 | build/
20 | *.bridgesupport
21 | build-iPhoneOS/
22 | build-iPhoneSimulator/
23 | 
24 | ## Specific to RubyMotion (use of CocoaPods):
25 | #
26 | # We recommend against adding the Pods directory to your .gitignore. However
27 | # you should judge for yourself, the pros and cons are mentioned at:
28 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29 | #
30 | # vendor/Pods/
31 | 
32 | ## Documentation cache and generated files:
33 | /.yardoc/
34 | /_yardoc/
35 | /doc/
36 | /rdoc/
37 | 
38 | ## Environment normalization:
39 | /.bundle/
40 | /vendor/bundle
41 | /lib/bundler/man/
42 | 
43 | # for a library or gem, you might want to ignore these files since the code is
44 | # intended to run in multiple environments; otherwise, check them in:
45 | # Gemfile.lock
46 | # .ruby-version
47 | # .ruby-gemset
48 | 
49 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50 | .rvmrc
51 | 


--------------------------------------------------------------------------------
/hyperdata/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 
2 | ### 0.0.1 / 2017-06-11
3 | 
4 | * Everything is new. First release
5 | 


--------------------------------------------------------------------------------
/hyperdata/Manifest.txt:
--------------------------------------------------------------------------------
1 | HISTORY.md
2 | Manifest.txt
3 | README.md
4 | Rakefile
5 | lib/hyperdata.rb
6 | lib/hyperdata/version.rb
7 | 


--------------------------------------------------------------------------------
/hyperdata/README.md:
--------------------------------------------------------------------------------
  1 | # hyperdata gem - turn hypertext from web pages into structured data (supports Feed.HTML n friends)
  2 | 
  3 | 
  4 | * home  :: [github.com/feedhtml/hyperdata](https://github.com/feedhtml/hyperdata)
  5 | * bugs  :: [github.com/feedhtml/hyperdata/issues](https://github.com/feedhtml/hyperdata/issues)
  6 | * gem   :: [rubygems.org/gems/hyperdata](https://rubygems.org/gems/hyperdata)
  7 | * rdoc  :: [rubydoc.info/gems/hyperdata](http://rubydoc.info/gems/hyperdata)
  8 | * forum :: [groups.google.com/group/wwwmake](http://groups.google.com/group/wwwmake)
  9 | 
 10 | 
 11 | ## What's Feed.HTML? - A Free Feeds Format in HyperText Markup Language (HTML) w/ Structured Meta Data
 12 | 
 13 | What's Feed.HTML? Let's start with an example from the Microformats v2 `h-entry` spec:
 14 | 
 15 | ``` html
 16 | <article class="h-entry">
 17 |   <h1 class="p-name">Microformats are amazing</h1>
 18 |   <p>Published by <a class="p-author h-card" href="http://example.com">W. Developer</a>
 19 |      on <time class="dt-published" datetime="2013-06-13 12:00:00">13<sup>th</sup> June 2013</time>
 20 |  
 21 |   <p class="p-summary">In which I extoll the virtues of using microformats.</p>
 22 |  
 23 |   <div class="e-content">
 24 |     <p>Blah blah blah</p>
 25 |   </div>
 26 | </article>
 27 | ```
 28 | 
 29 | Let's try to make it simpler and easier. Why in 2017 still (re)use `class` for microformats / microdata? 
 30 | Let's use `o` for object types / structs / scopes and `x` for (object) props / property keys:
 31 | 
 32 | ``` html
 33 | <article o=item>
 34 |   <h1 x=title>Microformats are amazing</h1>
 35 |   <p>Published by <a o=card x=author href="http://example.com">W. Developer</a>
 36 |      on <time x=published datetime="2013-06-13 12:00:00">13<sup>th</sup> June 2013</time>
 37 |  
 38 |   <p x=summary>In which I extoll the virtues of using microformats.</p>
 39 |  
 40 |   <div x=content>
 41 |     <p>Blah blah blah</p>
 42 |   </div>
 43 | </article>
 44 | ```
 45 | 
 46 | Why `o` and `x`? and not let's say `p` and `q`? The idea is to use letters that are not already used in single-letter tags
 47 | and that are easy to remember - think: tic-tac-toe-like ;-)
 48 | 
 49 | 
 50 | Parsed to JSON resulting in:
 51 | 
 52 | ``` json
 53 | {
 54 |    "title": "Microformats are amazing",
 55 |    "author": "W. Developer",
 56 |    "card":   { "name": "W. Developer",
 57 |                "url":  "http://example.com"
 58 |              },
 59 |    "published": "2013-06-13 12:00:00",
 60 |    "summary": "In which I extoll the virtues of using microformats.",
 61 |    "content": "<p>Blah blah blah</p>"
 62 | }
 63 | 
 64 | ```
 65 | 
 66 | ### Shortcuts / Alternatives
 67 | 
 68 | #### Use hfeed / hitem / hcard
 69 | 
 70 | As an alternative you can use  `hfeed` or `feed` (for `o=feed`), `hitem` or `item` (for `o=item`), 
 71 | `hcard` or `item` (for `o=card`) shortcuts. Let's (re)try:
 72 | 
 73 | ``` html
 74 | <article item>
 75 |   <h1 title>Microformats are amazing</h1>
 76 |   <p>Published by <a card author href="http://example.com">W. Developer</a>
 77 |      on <time published datetime="2013-06-13 12:00:00">13<sup>th</sup> June 2013</time>
 78 |  
 79 |   <p summary>In which I extoll the virtues of using microformats.</p>
 80 |  
 81 |   <div content>
 82 |     <p>Blah blah blah</p>
 83 |   </div>
 84 | </article>
 85 | ```
 86 | 
 87 | 
 88 | #### Use "predefined" convention over configuration structures
 89 | 
 90 | As an alternative you can use the "recommend" predefined convention over configuration
 91 | structure. Let's (re)try:
 92 | 
 93 | ``` html
 94 | <article>
 95 |   <h1>Microformats are amazing</h1>
 96 |   <p>Published by <a href="http://example.com">W. Developer</a>
 97 |      on <time datetime="2013-06-13 12:00:00">13<sup>th</sup> June 2013</time>
 98 |  
 99 |   <p>In which I extoll the virtues of using microformats.</p>
100 |  
101 |   <div>
102 |     <p>Blah blah blah</p>
103 |   </div>
104 | </article>
105 | ```
106 | 
107 | E.g.:
108 | 
109 | - Use article for your item.
110 | - Use heading (h1) for your title.
111 | - The first paragraph (p) for your metadata block with author and published date.
112 |   - The first time (time) is the published date.
113 |   - The first anchor link (a) is the author.
114 | - Optional: The second paragraph (p) is the summary.
115 | - The first division (div) is the content.
116 | 
117 | 
118 | 
119 | ## Usage
120 | 
121 | To be done.
122 | 
123 | 
124 | ## License
125 | 
126 | ![](https://publicdomainworks.github.io/buttons/zero88x31.png)
127 | 
128 | The Feed.HTML format & conventions
129 | and the `hyperdata` scripts are dedicated to the public domain.
130 | Use it as you please with no restrictions whatsoever.
131 | 
132 | ## Questions? Comments?
133 | 
134 | Send them along to the [wwwmake Forum/Mailing List](http://groups.google.com/group/wwwmake).
135 | Thanks!
136 | 
137 | 


--------------------------------------------------------------------------------
/hyperdata/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/hyperdata/version.rb'
 3 | 
 4 | Hoe.spec 'hyperdata' do
 5 | 
 6 |   self.version = Hyperdata::VERSION
 7 | 
 8 |   self.summary = "hyperdata - turn hypertext from web pages into structured data (supports Feed.HTML n friends)"
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/feedhtml/hyperdata']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'wwwmake@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.licenses = ['Public Domain']
21 | 
22 |   self.spec_extras = {
23 |     required_ruby_version: '>= 1.9.2'
24 |   }
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/hyperdata/lib/hyperdata.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | # core and stdlibs
 5 | 
 6 | require 'json'
 7 | require 'date'
 8 | require 'time'
 9 | require 'pp'
10 | 
11 | 
12 | # 3rd party gems/libs
13 | require 'logutils'
14 | 
15 | require 'nokogiri'
16 | 
17 | 
18 | # our own code
19 | require 'hyperdata/version'  # let it always go first
20 | 
21 | require 'hyperdata/feed'
22 | require 'hyperdata/item'
23 | 
24 | require 'hyperdata/builder/article'
25 | require 'hyperdata/parser'
26 | 
27 | 
28 | 
29 | # say hello
30 | puts Hyperdata.banner     if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
31 | 


--------------------------------------------------------------------------------
/hyperdata/lib/hyperdata/builder/article.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Hyperdata
 4 | 
 5 | class ArticleFeedBuilder
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 | 
10 |   def self.build( doc )
11 |     feed = self.new( doc )
12 |     feed.to_feed
13 |   end
14 | 
15 |   def initialize( doc )
16 |     @feed = build_feed( doc )
17 |   end
18 | 
19 |   def to_feed() @feed; end
20 | 
21 | 
22 | 
23 | 
24 |   def build_feed( doc )
25 |     feed = Feed.new
26 | 
27 |     ## todo: find title from page_url
28 | 
29 |     articles = doc.css( 'article' )
30 |     pp articles.size
31 |     pp articles
32 | 
33 |     articles.each do |article|
34 |       feed.items << build_item( article )
35 |     end
36 | 
37 |     feed # return new feed
38 |   end # method build_feed
39 | 
40 | 
41 | 
42 | 
43 |   def build_item( ht )
44 |     item = Item.new   # Item.new
45 | 
46 |     ## check for h1
47 | 
48 |     headings = ht.css( 'h1' )
49 |     if headings.any?
50 |       item.title = headings[0].text
51 |     end
52 | 
53 |     paras = ht.css( 'p' )
54 |     if paras[1]   ## quick hack: for now assume 2nd para is summary if present
55 |       item.summary = paras[1].text
56 |     end
57 | 
58 |     item
59 |   end # method build_item
60 | 
61 | end # ArticleFeedBuilder
62 | end # Hyperdata
63 | 


--------------------------------------------------------------------------------
/hyperdata/lib/hyperdata/feed.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Hyperdata
 4 | 
 5 | class Feed
 6 | 
 7 |   attr_accessor :title
 8 |   attr_accessor :url        ## todo - add alias site_url/home_page_url/page_url - why? why not??
 9 |   attr_accessor :feed_url
10 | 
11 |   attr_accessor :items
12 | 
13 |   def initialize
14 |     ## note: make items empty arrays on startup (e.g. not nil)
15 |     @items   = []
16 |   end
17 | 
18 | end  # class Feed
19 | 
20 | end # module Hyperdata
21 | 


--------------------------------------------------------------------------------
/hyperdata/lib/hyperdata/item.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Hyperdata
 4 | 
 5 | class Item
 6 | 
 7 |   attr_accessor :title
 8 |   attr_accessor :url
 9 | 
10 | 
11 |   ## note: only content/content_html should use html;
12 |   ##  all others (e.g. title/summary/content_text) shoud be plain (vanilla) text
13 | 
14 | 
15 |   def content?()  @content.nil? == false;  end
16 |   attr_accessor  :content
17 | 
18 |   ## note: content_html is an alias for content
19 |   ##   will hold type html/xhtml/html-escaped  - check if always converted to string by parser ??
20 |   alias :content_html  :content
21 |   alias :content_html= :content=
22 |   alias :content_html? :content?
23 | 
24 | 
25 |   def content_text?()  @content_text.nil? == false;  end
26 |   attr_accessor  :content_text
27 | 
28 | 
29 | 
30 |   def summary?()  @summary.nil? == false;  end
31 |   attr_accessor   :summary
32 | 
33 | 
34 |   def updated?()  @updated.nil? == false;  end
35 |   attr_accessor :updated
36 |   attr_accessor :updated_local  # "unparsed" local datetime as in feed (NOT converted to utc)
37 | 
38 |   def published?()  @published.nil? == false;  end
39 |   attr_accessor :published  # note: published is basically an alias for created
40 |   attr_accessor :published_local   # "unparsed" local datetime as in feed (NOT converted to utc)
41 | 
42 |   attr_accessor :id
43 | 
44 | end  # class Item
45 | 
46 | end # module Hyperdata
47 | 


--------------------------------------------------------------------------------
/hyperdata/lib/hyperdata/parser.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | module Hyperdata
 5 | 
 6 | 
 7 | class Parser
 8 | 
 9 |   include LogUtils::Logging
10 | 
11 | 
12 |   ### convenience class/factory method
13 |   def self.parse( text, opts={} )
14 |     self.new( text ).parse
15 |   end
16 | 
17 |   ### Note: lets keep/use same API as RSS::Parser for now
18 |   def initialize( text )
19 |     @text = text
20 |   end
21 | 
22 | 
23 | 
24 |   def parse
25 |     @doc = Nokogiri::HTML( @text )
26 | 
27 |     @feed = ArticleFeedBuilder.build( @doc )
28 |     @feed    # return feed for now  (use a (Hyper)FeedParser instead of "generic" Parser - why? why not?)
29 |   end # method parse
30 | 
31 | 
32 | end  # class Parser
33 | end # module Hyperdata
34 | 


--------------------------------------------------------------------------------
/hyperdata/lib/hyperdata/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Hyperdata
 4 | 
 5 |   MAJOR = 0
 6 |   MINOR = 0
 7 |   PATCH = 1
 8 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
 9 | 
10 |   def self.version
11 |     VERSION
12 |   end
13 | 
14 |   def self.banner
15 |     "hyperdata/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16 |   end
17 | 
18 |   def self.root
19 |     "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
20 |   end
21 | 
22 | end # module Hyperdata
23 | 


--------------------------------------------------------------------------------
/hyperdata/sandbox/dumps/article.html.txt:
--------------------------------------------------------------------------------
 1 | #(Document:0x20537e8 {
 2 |   name = "document",
 3 |   children = [
 4 |     #(DTD:0x2053314 { name = "html" }),
 5 |     #(Element:0x2052f3c {
 6 |       name = "html",
 7 |       children = [
 8 |         #(Element:0x2052a20 {
 9 |           name = "body",
10 |           children = [
11 |             #(Element:0x2052534 {
12 |               name = "article",
13 |               children = [
14 |                 #(Text "\n  "),
15 |                 #(Element:0x2057dfc {
16 |                   name = "h1",
17 |                   children = [ #(Text "Microformats are amazing")]
18 |                   }),
19 |                 #(Text "\n  "),
20 |                 #(Element:0x205734c {
21 |                   name = "p",
22 |                   children = [
23 |                     #(Text "Published by "),
24 |                     #(Element:0x2056cd4 {
25 |                       name = "a",
26 |                       attributes = [
27 |                         #(Attr:0x2056b48 {
28 |                           name = "href",
29 |                           value = "http://example.com"
30 |                           })],
31 |                       children = [ #(Text "W. Developer")]
32 |                       }),
33 |                     #(Text "\n     on "),
34 |                     #(Element:0x205bc60 {
35 |                       name = "time",
36 |                       attributes = [
37 |                         #(Attr:0x205bab0 {
38 |                           name = "datetime",
39 |                           value = "2013-06-13 12:00:00"
40 |                           })],
41 |                       children = [
42 |                         #(Text "13"),
43 |                         #(Element:0x205af70 {
44 |                           name = "sup",
45 |                           children = [ #(Text "th")]
46 |                           }),
47 |                         #(Text " June 2013")]
48 |                       }),
49 |                     #(Text "\n\n  ")]
50 |                   }),
51 |                 #(Element:0x205a2f8 {
52 |                   name = "p",
53 |                   children = [
54 |                     #(Text "In which I extoll the virtues of using microformats.")]
55 |                   }),
56 |                 #(Text "\n\n  "),
57 |                 #(Element:0x205f998 {
58 |                   name = "div",
59 |                   children = [
60 |                     #(Text "\n    "),
61 |                     #(Element:0x205f320 {
62 |                       name = "p",
63 |                       children = [ #(Text "Blah blah blah")]
64 |                       }),
65 |                     #(Text "\n  ")]
66 |                   }),
67 |                 #(Text "\n")]
68 |               }),
69 |             #(Text "\n")]
70 |           })]
71 |       })]
72 |   })
73 | 


--------------------------------------------------------------------------------
/hyperdata/sandbox/dumps/o-item.html.txt:
--------------------------------------------------------------------------------
 1 | #(Document:0x210b7c0 {
 2 |   name = "document",
 3 |   children = [
 4 |     #(DTD:0x210b2ec { name = "html" }),
 5 |     #(Element:0x210af14 {
 6 |       name = "html",
 7 |       children = [
 8 |         #(Element:0x210a9f8 {
 9 |           name = "body",
10 |           children = [
11 |             #(Element:0x210a50c {
12 |               name = "article",
13 |               attributes = [ #(Attr:0x210a26c { name = "o", value = "item" })],
14 |               children = [
15 |                 #(Text "\n  "),
16 |                 #(Element:0x210f72c {
17 |                   name = "h1",
18 |                   attributes = [
19 |                     #(Attr:0x210f5a0 { name = "x", value = "title" })],
20 |                   children = [ #(Text "Microformats are amazing")]
21 |                   }),
22 |                 #(Text "\n  "),
23 |                 #(Element:0x210e730 {
24 |                   name = "p",
25 |                   children = [
26 |                     #(Text "Published by "),
27 |                     #(Element:0x210e0b8 {
28 |                       name = "a",
29 |                       attributes = [
30 |                         #(Attr:0x2113f14 { name = "o", value = "card" }),
31 |                         #(Attr:0x2113f08 { name = "x", value = "author" }),
32 |                         #(Attr:0x2113efc {
33 |                           name = "href",
34 |                           value = "http://example.com"
35 |                           })],
36 |                       children = [ #(Text "W. Developer")]
37 |                       }),
38 |                     #(Text "\n     on "),
39 |                     #(Element:0x211269c {
40 |                       name = "time",
41 |                       attributes = [
42 |                         #(Attr:0x21124ec { name = "x", value = "published" }),
43 |                         #(Attr:0x21124e0 {
44 |                           name = "datetime",
45 |                           value = "2013-06-13 12:00:00"
46 |                           })],
47 |                       children = [
48 |                         #(Text "13"),
49 |                         #(Element:0x21174c0 {
50 |                           name = "sup",
51 |                           children = [ #(Text "th")]
52 |                           }),
53 |                         #(Text " June 2013")]
54 |                       }),
55 |                     #(Text "\n\n  ")]
56 |                   }),
57 |                 #(Element:0x2116848 {
58 |                   name = "p",
59 |                   attributes = [
60 |                     #(Attr:0x2116698 { name = "x", value = "summary" })],
61 |                   children = [
62 |                     #(Text "In which I extoll the virtues of using microformats.")]
63 |                   }),
64 |                 #(Text "\n \n  "),
65 |                 #(Element:0x211b8d0 {
66 |                   name = "div",
67 |                   attributes = [
68 |                     #(Attr:0x211b720 { name = "x", value = "content" })],
69 |                   children = [
70 |                     #(Text "\n    "),
71 |                     #(Element:0x211ac40 {
72 |                       name = "p",
73 |                       children = [ #(Text "Blah blah blah")]
74 |                       }),
75 |                     #(Text "\n  ")]
76 |                   }),
77 |                 #(Text "\n")]
78 |               }),
79 |             #(Text "\n")]
80 |           })]
81 |       })]
82 |   })
83 |   
84 | 


--------------------------------------------------------------------------------
/hyperdata/test/feeds/spec/article.html:
--------------------------------------------------------------------------------
 1 | <article>
 2 |   <h1>Microformats are amazing</h1>
 3 |   <p>Published by <a href="http://example.com">W. Developer</a>
 4 |      on <time datetime="2013-06-13 12:00:00">13<sup>th</sup> June 2013</time>
 5 | 
 6 |   <p>In which I extoll the virtues of using microformats.</p>
 7 | 
 8 |   <div>
 9 |     <p>Blah blah blah</p>
10 |   </div>
11 | </article>
12 | 


--------------------------------------------------------------------------------
/hyperdata/test/feeds/spec/o/item.html:
--------------------------------------------------------------------------------
 1 | <article o=item>
 2 |   <h1 x=title>Microformats are amazing</h1>
 3 |   <p>Published by <a o=card x=author href="http://example.com">W. Developer</a>
 4 |      on <time x=published datetime="2013-06-13 12:00:00">13<sup>th</sup> June 2013</time>
 5 | 
 6 |   <p x=summary>In which I extoll the virtues of using microformats.</p>
 7 |  
 8 |   <div x=content>
 9 |     <p>Blah blah blah</p>
10 |   </div>
11 | </article>
12 | 


--------------------------------------------------------------------------------
/hyperdata/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | 
 4 | ## minitest setup
 5 | 
 6 | require 'minitest/autorun'
 7 | 
 8 | require 'logutils'
 9 | require 'textutils'
10 | 
11 | 
12 | ## our own code
13 | require 'hyperdata'
14 | 
15 | 
16 | 
17 | LogUtils::Logger.root.level = :debug
18 | 
19 | 
20 | def read_text( name )
21 |   text = File.read( "#{Hyperdata.root}/test/feeds/#{name}.html" )
22 |   text
23 | end
24 | 


--------------------------------------------------------------------------------
/hyperdata/test/test_article.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_article.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | class TestArticle < MiniTest::Test
11 | 
12 |   def test_article
13 |     text = read_text( 'spec/article' )
14 |     ## text = read_text( 'spec/o/item' )
15 |     feed = Hyperdata::Parser.parse( text )
16 |     pp feed
17 | 
18 |     assert true
19 |   end
20 | 
21 | end # class TestArticle
22 | 


--------------------------------------------------------------------------------
/hyperdata/test/test_version.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_version.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | class TestVersion < MiniTest::Test
11 | 
12 |   def test_version
13 | 
14 |     puts "Hyperdata: #{Hyperdata::VERSION}"
15 | 
16 |     assert true
17 |   end
18 | 
19 | end # class TestVersion
20 | 


--------------------------------------------------------------------------------