├── LICENSE.md ├── README.md ├── awesome-feeds ├── FORMATS.md ├── METADATA.md ├── README.md └── TIMELINE.md ├── feedfilter ├── .gitignore ├── CHANGELOG.md ├── Manifest.txt ├── README.md ├── Rakefile ├── config │ ├── feedburner.txt │ └── feedflare.txt ├── lib │ ├── feedfilter.rb │ └── feedfilter │ │ ├── ads.rb │ │ ├── includes.rb │ │ └── version.rb └── test │ ├── helper.rb │ ├── test_ads.rb │ ├── test_ads_all.rb │ └── test_includes.rb ├── feedfinder ├── .gitignore ├── CHANGELOG.md ├── Manifest.txt ├── README.md ├── Rakefile └── lib │ ├── feedfinder.rb │ └── feedfinder │ └── version.rb ├── feedparser ├── .gitignore ├── CHANGELOG.md ├── Manifest.txt ├── NOTES.md ├── README.md ├── Rakefile ├── attic │ ├── atom_v03.rb │ ├── feed.rb │ ├── item.rb │ ├── test_atom_from_file.rb │ └── test_atom_v03.rb ├── feed-models.png ├── lib │ ├── feedparser.rb │ └── feedparser │ │ ├── attachment.rb │ │ ├── author.rb │ │ ├── builder │ │ ├── atom.rb │ │ ├── json.rb │ │ ├── microformats.rb │ │ └── rss.rb │ │ ├── feed.rb │ │ ├── generator.rb │ │ ├── item.rb │ │ ├── parser.rb │ │ ├── tag.rb │ │ ├── thumbnail.rb │ │ └── version.rb ├── sandbox │ ├── dumps │ │ ├── intertwingly.atom.xml │ │ ├── nostarch.rss2.xml │ │ └── rubyonrails.atom.xml │ ├── testatom.rb │ ├── testpp.rb │ └── testrss.rb └── test │ ├── helper.rb │ ├── media_rss_example.txt │ ├── test_atom_live.rb │ ├── test_attachments_live.rb │ ├── test_dates.rb │ ├── test_microformats.rb │ └── test_rss_live.rb ├── feeds ├── NOTES.md ├── README.md ├── Rakefile ├── books │ ├── nostarch.rss │ ├── oreilly.feedburner.atom │ └── pragprog.rss ├── comics │ ├── xkcd.atom │ └── xkcd.rss ├── misc │ ├── byparker.json │ ├── daringfireball.atom │ ├── daringfireball.json │ ├── googlegroups.atom │ ├── googlegroups2.atom │ ├── headius.atom │ ├── indie-blog.html │ ├── inessential.json │ ├── intertwingly.atom │ ├── jsonfeed.json │ ├── lambdatheultimate.rss │ ├── learnenough.feedburner.atom │ ├── ongoing.atom │ ├── railstutorial.feedburner.atom │ ├── rubyflow.feedburner.rss │ ├── rubymine.feedburner.rss │ ├── rubyonrails.atom │ ├── scripting.rss │ └── sitepoint.rss ├── news │ ├── guardian-facebook.rss │ ├── guardian-naomi-klein.rss │ ├── guardian-world.rss │ ├── nytimes-blogs-bits.rss │ ├── nytimes-paul-krugman.rss │ ├── nytimes-tech.rss │ ├── nytimes-thomas-l-friedman.rss │ ├── nytimes.rss │ ├── washingtonpost-blogs-innovations.rss │ ├── washingtonpost-politics.rss │ └── washingtonpost-world.rss ├── osm │ ├── blog.openstreetmap.rss │ ├── blogs.openstreetmap.rss │ └── mapbox.rss ├── spec │ ├── atom │ │ ├── author.atom │ │ ├── authors.atom │ │ └── categories.atom │ ├── json │ │ ├── example.json │ │ ├── microblog.json │ │ └── tags.json │ ├── microformats │ │ └── hentry.html │ └── rss │ │ ├── author.rss │ │ ├── categories.rss │ │ └── creator.rss └── test │ ├── helper.rb │ └── test_feeds.rb ├── feedtxt.specs ├── README.md └── _includes │ └── header.html ├── feedtxt ├── .gitignore ├── HISTORY.md ├── Manifest.txt ├── README.md ├── Rakefile ├── lib │ ├── feedtxt.rb │ └── feedtxt │ │ ├── parser.rb │ │ ├── parser │ │ ├── ini.rb │ │ ├── json.rb │ │ └── yaml.rb │ │ └── version.rb └── test │ ├── feeds │ └── spec │ │ ├── example.ini.txt │ │ ├── example.json.txt │ │ ├── example.yaml.txt │ │ ├── podcast.ini.txt │ │ ├── podcast.json.txt │ │ └── podcast.yaml.txt │ ├── helper.rb │ ├── test_ini.rb │ ├── test_json.rb │ ├── test_scanner.rb │ └── test_yaml.rb └── hyperdata ├── .gitignore ├── CHANGELOG.md ├── Manifest.txt ├── README.md ├── Rakefile ├── lib ├── hyperdata.rb └── hyperdata │ ├── builder │ └── article.rb │ ├── feed.rb │ ├── item.rb │ ├── parser.rb │ └── version.rb ├── sandbox └── dumps │ ├── article.html.txt │ └── o-item.html.txt └── test ├── feeds └── spec │ ├── article.html │ └── o │ └── item.html ├── helper.rb ├── test_article.rb └── test_version.rb /LICENSE.md: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # (Universal) Web Feed Parser And Normalizer (Atom, RSS, JSON Feed, Feed.TXT, Feed.HTML, HTML w/ Microformats e.g. h-entry, Etc.) And More 2 | 3 | 4 | Gem Familiy 5 | 6 | [**feedparser**](feedparser) - web feed parser and normalizer (Atom, RSS, JSON Feed, HTML h-entry, etc.) 7 | 8 | [**feedfilter**](feedfilter) - feed filter and rules for easy (re)use 9 | 10 | [**feedtxt**](feedtxt) - reads Feed.TXT - feeds in text (unicode) - publish & share posts, articles, podcasts, 'n' more 11 | 12 | 13 | 14 | 15 | More 16 | 17 | [**feeds**](feeds) - tests, tests, tests - feeds (in Atom, RSS, JSON Feed, HTML h-entry, etc.) with test assertions 18 | 19 | [**awesome-feeds**](awesome-feeds) - a collection of awesome feeds (formats, readers, services & tools, templates 'n' more) - JSON Feed, RSS, Atom, HTML w/ Microformats, Feed.TXT 20 | 21 | -------------------------------------------------------------------------------- /awesome-feeds/FORMATS.md: -------------------------------------------------------------------------------- 1 | 2 | # Awesome Feeds > Formats 3 | 4 | 5 | Note: (†) used for historic formats (no longer in use or obsolete or were just experimental) 6 | 7 | 8 | ## RSS "Family" 9 | 10 | 11 | ### RSS 0.9 (†) (RDF Site Summary) 12 | 13 | by Netscape 14 | 15 | 16 | ### RDF / RSS 1.0 (†) (RDF Site Summary) 17 | 18 | by rdf-dev-group 19 | 20 | 21 | ### RSS 2.0, 0.92 (†), 0.91 (†) (Really Simple Syndication) 22 | 23 | by Dave Winer (DW) - formerly UserLand 24 | 25 | #### RSS.js / RSS.json / RSS-in-JSON / RSS-in-JS by Dave Winer 26 | 27 | See [`rssjs.org`](http://rssjs.org). 28 | 29 | - [RSS-in-JSON is a feed format](https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md) by Dave Winer, June 2017 30 | 31 | 32 | ### RSS 3.0 (†) 33 | 34 | by Aaron Swartz 35 | 36 | See [RSS 3.0](http://www.aaronsw.com/2002/rss30), 2002 37 | 38 | - [The Road to RSS 3.0](http://www.aaronsw.com/weblog/000574) by Aaron Swartz, September 2002 39 | 40 | Plain Text with key value pairs and multi-line values with indentation. 41 | 42 | 43 | 44 | ### RSS 5.0 (Really Simple, Stupid or Really Simple Sharing) 45 | 46 | See Feed.TXT 47 | 48 | 49 | ### More RSS 50 | 51 | - [RSS @ Wikipedia](https://en.wikipedia.org/wiki/RSS) 52 | 53 | 54 | ## Atom 55 | 56 | - [Atom (standard) @ Wikipedia](https://en.wikipedia.org/wiki/Atom_(standard)) 57 | 58 | 59 | 60 | ## JSON Formats 61 | 62 | ### JSON Feed 63 | 64 | See [`jsonfeed.org`](https://jsonfeed.org). 65 | 66 | ### Activity Streams 67 | 68 | See [`activitystrea.ms`](http://activitystrea.ms) 69 | 70 | - [Activity_Streams (format) @ Wikipedia](https://en.wikipedia.org/wiki/Activity_Streams_(format)) 71 | 72 | ### Collection+JSON 73 | 74 | See [Collection+JSON - Hypermedia Type](http://amundsen.com/media-types/collection/) 75 | 76 | Collection+JSON is a JSON-based read/write hypermedia-type designed to support management and querying of simple collections. It is similar to the The Atom Syndication Format (RFC4287) and the The Atom Publishing Protocol (RFC5023) . However, Collection+JSON defines both the format and the semantics in a single media type. It also includes support for Query Templates and expanded write support through the use of a Write Template. 77 | 78 | 79 | 80 | 81 | ## YAML Feed (†) 82 | 83 | - [YAMLFeed @ Indie Web](https://indieweb.org/YAMLFeed) (twitter: [yamlfeed](https://twitter.com/yamlfeed)) - Launched as a "practical" lulz joke - keep the formats wars (eg. rss vs atom) alive (e.g. now json vs yaml). 84 | 85 | 86 | ## Microformats 87 | 88 | ### Microformats V1 hentry/hatom 89 | 90 | ### Microformats V2 h-entry/h-feed 91 | 92 | 93 | 94 | ## Feed.TXT 95 | 96 | See [Feed.TXT](https://feedtxt.github.io). 97 | 98 | A Free Feeds Format in Plain Text w/ Structured Meta Data 99 | 100 | 101 | 102 | 103 | ## More 104 | 105 | -------------------------------------------------------------------------------- /awesome-feeds/METADATA.md: -------------------------------------------------------------------------------- 1 | 2 | # Awesome Feeds > Meta Data 3 | 4 | How many ways to add 5 | 6 | - Author 7 | - Title 8 | - Date 9 | 10 | Let's count ;-) 11 | 12 | 13 | 14 | ## Person / People 15 | 16 | - **creator** -- Dublin Core Meta Data 17 | - **publisher** -- Dublin Core Meta Data 18 | - **author** -- RSS 2.0, Atom, JSON Feed 19 | - **contributor** -- Atom 20 | - **managingEditor** -- RSS 2.0 Channel 21 | - **webMaster** -- RSS 2.0 Channel 22 | 23 | 24 | ## Dates 25 | 26 | - **published** -- Atom 27 | - **pubDate** -- RSS 2.0 28 | - **date_published** -- JSON Feed 29 | - **date** -- Dublin Core Meta Data 30 | - **updated** -- Atom 31 | - **date_modified** -- JSON Feed 32 | - **lastBuildDate** -- RSS 2.0 Channel 33 | 34 | 35 | ## Title 36 | 37 | - **title** -- Atom / RSS 2.0 / JSON Feed 38 | - **name** 39 | 40 | 41 | _2nd Level Title_ 42 | 43 | - **subtitle** -- Atom 44 | - **tagline** 45 | 46 | 47 | ## Summary 48 | 49 | - **summary** -- Atom / JSON Feed 50 | - **description** -- RSS 2.0 51 | - **abstract** 52 | - **excerpt** 53 | 54 | 55 | ## Content 56 | 57 | - **content** -- Atom (Defaults to Text!), RSS Yahoo! Search (Media) Extension 58 | - **content type="text|html|xhtml"** -- Atom (Defaults to Text!) 59 | - **content_text** -- JSON Feed 60 | - **content_html** -- JSON Feed 61 | - **content:encoded** -- RDF Content Module 62 | 63 | 64 | 65 | ## Tags / Categories 66 | 67 | - **category** -- RSS 2.0 68 | - **category term=** -- Atom 69 | - **tags[]** -- JSON Feed 70 | - **keywords** 71 | 72 | _Scheme_ 73 | 74 | - **scheme** -- Atom 75 | - **domain** -- RSS 2.0 76 | 77 | 78 | ## Link 79 | 80 | - **url** -- JSON Feed 81 | - **link** -- RSS 2.0 82 | - **link href=** -- Atom 83 | 84 | 85 | _More Links_ 86 | 87 | - **home_page_url** -- JSON Feed (site url) 88 | - **feed_url** -- JSON Feed (feed url) 89 | - **link href= rel="self"** -- Atom (feed url) 90 | - **link href= rel="alternate"** -- Atom (site url) 91 | 92 | 93 | ## ID 94 | 95 | - **id** -- Atom, JSON Feed 96 | - **guid** -- RSS 2.0 97 | - **permalink** 98 | 99 | 100 | ## Attachments 101 | 102 | - **attachments[] url=** -- JSON Feed 103 | - **enclosure url=** -- RSS 2.0 104 | - **link href= rel="enclosure"** -- Atom 105 | 106 | _Examples_ 107 | 108 | JSON Feed: 109 | 110 | ``` json 111 | "attachments": [ 112 | { 113 | "url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a", 114 | "mime_type": "audio/x-m4a", 115 | "size_in_bytes": 89970236, 116 | "duration_in_seconds": 6629 117 | } 118 | ] 119 | ``` 120 | 121 | RSS 2.0: 122 | 123 | ``` xml 124 | 127 | ``` 128 | 129 | Atom: 130 | 131 | ``` xml 132 | 137 | 142 | ``` 143 | 144 | 145 | ## More - What's Missing? 146 | 147 | - add banner image for item / entry? 148 | - add image / cover for feed / channel? 149 | - add (fav)icon for feed / channel? 150 | - add language ? 151 | - add expired yes/no or with date? 152 | 153 | -------------------------------------------------------------------------------- /awesome-feeds/README.md: -------------------------------------------------------------------------------- 1 | 2 | JSON Feed • RSS • Atom • HTML w/ Microformats • Feed.TXT 3 | 4 | 5 | # Awesome Feeds 6 | 7 | A collection of awesome feeds (formats, readers, services & tools, templates 'n' more). 8 | 9 | #### _Contributions welcome. Anything missing? Send in a pull request. Thanks._ 10 | 11 | 12 | 13 | [Formats](#formats) • 14 | [Parsers](#parsers) • 15 | [Filters](#filters) • 16 | [Converters](#converters) • 17 | [Readers](#readers) • 18 | [Facebook & Co](#facebook--co) 19 | 20 | 21 | 22 | ## Formats 23 | 24 | _JSON, XML, HTML, YAML, TXT & Friends_ 25 | 26 | [JSON Feed](#json-feed) • 27 | [Atom](#atom) • 28 | [RSS](#rss) • 29 | [HTML Microformats](#html-microformats) • 30 | [Feed.TXT](#feedtxt) 31 | 32 | 33 | ### JSON Feed 34 | 35 | - [**jsonfeed.org**](https://jsonfeed.org) (twitter: [jsonfeed](https://twitter.com/jsonfeed)) - JSON Feed Project Site 36 | - [**Spec V1 @ JSON Feed**](https://jsonfeed.org/version/1) - Official Specifiaton Version 1.0 - in plain english (yeah!); May 2017 37 | - [**Code @ JSON Feed**](https://jsonfeed.org/code) - JSON Feed Templates, Plugins, Parser, Scripts & More 38 | 39 | 40 | **More** 41 | 42 | - [**Awesome JSON Feed**](https://github.com/rmlewisuk/awesome-json-feed) 43 | 44 | 45 | ### Atom 46 | 47 | _Atom Syndication & Friends_ 48 | 49 | - [**Atom Syndication Spec**](https://tools.ietf.org/html/rfc4287), December 2005 50 | 51 | 52 | ### RSS 53 | 54 | _Really Simple Syndication & Friends_ 55 | 56 | - [**RSS 2.0 Spec**](http://cyber.harvard.edu/rss/rss.html), July 2003 57 | 58 | 59 | ### HTML Microformats 60 | 61 | - [**h-feed Living Spec**](http://microformats.org/wiki/h-feed) 62 | - [**h-entry Living Spec**](http://microformats.org/wiki/h-entry) 63 | 64 | 65 | ### Feed.TXT 66 | 67 | - [**Feed.TXT**](https://feedtxt.github.io) (github: [feedtxt](https://github.com/feedtxt)) - Feed.TXT Project Site 68 | 69 | 70 | 71 | ## Parsers 72 | 73 | _Universal Feed Parser & Normalizer_ 74 | 75 | **Ruby** 76 | 77 | - [**feedparser**](https://github.com/feedparser/feedparser) (gem: [feedparser](https://rubygems.org/gems/feedparser)) - universal feed parser and normalizer (supports Atom, RSS, JSON, HTML, TXT etc.) 78 | 79 | 80 | 81 | ## Filters 82 | 83 | **Ruby** 84 | 85 | - [**feedfilter**](https://github.com/feedparser/feedfilter) (gem: [feedfilter](https://rubygems.org/gems/feedfilter)) - feed filter and rules for easy (re)use e.g. strip ads etc. 86 | 87 | 88 | 89 | ## Converters 90 | 91 | - [**feed2json**](https://feed2json.org) (github: [appsattic/feed2json.org](https://github.com/appsattic/feed2json.org)) - convert rss or atom to json feed 92 | 93 | 94 | ## Readers 95 | 96 | **JavaScript** 97 | 98 | - [**JSON Feed Viewer**](https://json-feed-viewer.herokuapp.com) (github: [maximevaillancourt/json-feed-viewer](https://github.com/maximevaillancourt/json-feed-viewer)) - browse through the showcased feeds, or enter a feed url 99 | - [**feeds React Sample**](https://github.com/playhtml/feeds/tree/master/react) - simple feeds news reader sample w/ React and JSON feed 100 | 101 | ## Facebook & Co 102 | 103 | _Let's build the next thousands facebooks & co news feeds and readers. Join the free & open web and read & share your posts & updates with feeds._ 104 | 105 | - [**Fuck Facebook**](https://daringfireball.net/2017/06/fuck_facebook) by John Gruber, June 2017 106 | - [**Why I can't/won't point to Facebook blog posts**](http://scripting.com/2017/05/31.html#a110526) by Dave Winer, May 2017 107 | 108 | 109 | 110 | ## Open Web & Friends 111 | 112 | _What's the open web? Why care about the future of online news & publishing?_ 113 | 114 | > Seriously guys, nobody gives a shit about the open web. Only your clique. 115 | > 116 | > -- [Joe Hewitt, June 2017](https://twitter.com/joehewitt/status/870363197580038144) 117 | 118 | - [**Introducing AltPlatform & our manifesto for the Open Web**](http://altplatform.org/2017/05/30/open-web-manifesto/) by Richard MacManus, May 2017 119 | 120 | 121 | ## Meta 122 | 123 | **License** 124 | 125 | ![](https://publicdomainworks.github.io/buttons/zero88x31.png) 126 | 127 | The awesome list is dedicated to the public domain. Use it as you please with no restrictions whatsoever. 128 | 129 | **Questions? Comments?** 130 | 131 | Post them to the [wwwmake forum](http://groups.google.com/group/wwwmake). Thanks! 132 | -------------------------------------------------------------------------------- /awesome-feeds/TIMELINE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Awesome Feeds > History > Timeline 4 | 5 | 6 | ## 2017 7 | 8 | **June** Feed.TXT introduced. 9 | 10 | RSS-in-JS (re)launched by Dave Winer. 11 | 12 | 13 | **May** JSON Feed introduced by 14 | 15 | 16 | 17 | ## 2004 18 | 19 | ## 2003 20 | 21 | ## 2002 22 | 23 | RSS 3.0 introduced by Aaron Swartz. Removed XML, namespaces, etc. 24 | 25 | 26 | ## 2000 27 | 28 | 29 | ## 1996 30 | 31 | Meta Content Framework (MCF) developed by Ramanathan V. Guha and others in Apple Computer's Advanced Technology Group. 32 | -------------------------------------------------------------------------------- /feedfilter/.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | coverage 6 | InstalledFiles 7 | lib/bundler/man 8 | pkg 9 | rdoc 10 | spec/reports 11 | test/tmp 12 | test/version_tmp 13 | tmp 14 | 15 | # YARD artifacts 16 | .yardoc 17 | _yardoc 18 | doc/ 19 | 20 | 21 | -------------------------------------------------------------------------------- /feedfilter/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ### 0.0.1 / 2015-01-08 3 | 4 | * Everything is new. First release 5 | 6 | -------------------------------------------------------------------------------- /feedfilter/Manifest.txt: -------------------------------------------------------------------------------- 1 | HISTORY.md 2 | Manifest.txt 3 | README.md 4 | Rakefile 5 | config/feedburner.txt 6 | config/feedflare.txt 7 | lib/feedfilter.rb 8 | lib/feedfilter/ads.rb 9 | lib/feedfilter/includes.rb 10 | lib/feedfilter/version.rb 11 | test/helper.rb 12 | test/test_ads.rb 13 | test/test_ads_all.rb 14 | test/test_includes.rb 15 | -------------------------------------------------------------------------------- /feedfilter/README.md: -------------------------------------------------------------------------------- 1 | # feedfilter gem - feed filter and rules for easy (re)use 2 | 3 | * home :: [github.com/feedparser/feedfilter](https://github.com/feedparser/feedfilter) 4 | * bugs :: [github.com/feedparser/feedfilter/issues](https://github.com/feedparser/feedfilter/issues) 5 | * gem :: [rubygems.org/gems/feedfilter](https://rubygems.org/gems/feedfilter) 6 | * rdoc :: [rubydoc.info/gems/feedfilter](http://rubydoc.info/gems/feedfilter) 7 | * forum :: [groups.google.com/group/wwwmake](http://groups.google.com/group/wwwmake) 8 | 9 | 10 | ## Usage 11 | 12 | 13 | ### `strip_ads` (in `AdsFilter` module) 14 | 15 | ``` 16 | require 'feedfilter' 17 | 18 | include FeedFilter::AdsFilter # lets us use strip_ads 19 | 20 | 21 | before_snippet =< 23 | 24 | 25 | 26 | 27 | EOS 28 | 29 | 30 | snippet = strip_ads( before_snippet ) 31 | 32 | puts snippet 33 | ``` 34 | 35 | 36 | ### Use Text Patterns (Regex) for Filters 37 | 38 | Ads Example: 39 | 40 | ``` 41 | FEEDFLARE_ADS = %r{ 42 | ]*? 43 | class=("|')feedflare\1 44 | [^>]*?> 45 | .*? 46 | <\/div> 47 | }mix 48 | 49 | FEEDBURNER_BUGS = %r{ 50 | ]*? 51 | src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1 52 | .*?> 53 | }mix 54 | 55 | ... 56 | ``` 57 | 58 | or as one-liners (if you prefer) 59 | 60 | ``` 61 | FEEDFLARE_ADS = %r{]*?class=("|')feedflare\1[^>]*?>.*?<\/div>}mi 62 | FEEDBURNER_BUGS = %r{]*?src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1.*?>}mi 63 | ... 64 | ``` 65 | 66 | 67 | ## License 68 | 69 | ![](https://publicdomainworks.github.io/buttons/zero88x31.png) 70 | 71 | The `feedfilter` scripts are dedicated to the public domain. 72 | Use it as you please with no restrictions whatsoever. 73 | 74 | ## Questions? Comments? 75 | 76 | Send them along to the [wwwmake Forum/Mailing List](http://groups.google.com/group/wwwmake). 77 | Thanks! 78 | -------------------------------------------------------------------------------- /feedfilter/Rakefile: -------------------------------------------------------------------------------- 1 | require 'hoe' 2 | require './lib/feedfilter/version.rb' 3 | 4 | Hoe.spec 'feedfilter' do 5 | 6 | self.version = FeedFilter::VERSION 7 | 8 | self.summary = "feedfilter - feed filter and rules for easy (re)use" 9 | self.description = summary 10 | 11 | self.urls = ['https://github.com/feedreader/feed.filter'] 12 | 13 | self.author = 'Gerald Bauer' 14 | self.email = 'feedreader@googlegroups.com' 15 | 16 | # switch extension to .markdown for gihub formatting 17 | self.readme_file = 'README.md' 18 | self.history_file = 'HISTORY.md' 19 | 20 | self.extra_deps = [ 21 | ['textutils', '>=1.0.1'], 22 | ] 23 | 24 | self.licenses = ['Public Domain'] 25 | 26 | self.spec_extras = { 27 | required_ruby_version: '>= 1.9.2' 28 | } 29 | 30 | end 31 | -------------------------------------------------------------------------------- /feedfilter/config/feedburner.txt: -------------------------------------------------------------------------------- 1 | #################################### 2 | # feedburner text pattern (regex) 3 | # 4 | # pattern (regex) 5 | # --- 6 | # test1 7 | # --- 8 | # test2 9 | # --- 10 | # etc. 11 | 12 | 13 | ]*? 14 | src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1 15 | .*?> 16 | 17 | --- 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /feedfilter/config/feedflare.txt: -------------------------------------------------------------------------------- 1 | ################################### 2 | # feedflare text pattern (regex) 3 | 4 | ]*? 5 | class=("|')feedflare\1 6 | [^>]*?> 7 | .*? 8 | <\/div> 9 | 10 | --- 11 | 12 |
13 | 14 | 15 | 16 |
17 | 18 | -------------------------------------------------------------------------------- /feedfilter/lib/feedfilter.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | # core and stdlibs 5 | 6 | 7 | # 3rd party gems/libs 8 | 9 | require 'textutils' 10 | 11 | # our own code 12 | 13 | require 'feedfilter/version' # let it always go first 14 | require 'feedfilter/ads' 15 | require 'feedfilter/includes' 16 | 17 | 18 | # say hello 19 | puts FeedFilter.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG) 20 | 21 | -------------------------------------------------------------------------------- /feedfilter/lib/feedfilter/ads.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | module FeedFilter 5 | 6 | 7 | class AdsFilters 8 | 9 | include LogUtils::Logging 10 | 11 | def initialize 12 | @filters=[] 13 | 14 | names=[ 15 | 'feedburner', 16 | 'feedflare' 17 | ] 18 | 19 | names.each do |name| 20 | logger.debug " add ads filter #{name}" 21 | 22 | b = BlockReader.from_file( "#{FeedFilter.root}/config/#{name}.txt").read 23 | ## Note: replace newline and space in string for regex (w/o spaces) 24 | ## Note: add multiline option and ignore case 25 | regexp = Regexp.new( b[0].gsub( /[\n ]/, '' ), Regexp::MULTILINE|Regexp::IGNORECASE ) 26 | @filters << [name, regexp] 27 | end 28 | end 29 | 30 | def filter( text ) 31 | @filters.each do |f| 32 | name = f[0] 33 | pattern = f[1] 34 | 35 | text = text.gsub( pattern ) do |m| 36 | # Note: m - match is just a regular string 37 | ## double check if it's true also if regex contains capture groups ??? 38 | puts "strip #{name}:" 39 | pp m 40 | '' 41 | end 42 | end # each filter 43 | text 44 | end # filter 45 | 46 | end # AdsFilters 47 | 48 | 49 | def self.strip_ads( text ) 50 | @@ads_filters ||= FeedFilter::AdsFilters.new 51 | @@ads_filters.filter( text ) 52 | end 53 | 54 | 55 | module AdsFilter 56 | def strip_ads( text ) 57 | FeedFilter.strip_ads( text ) 58 | end 59 | end # module AdsFilter 60 | 61 | end # module FeedFilter 62 | 63 | -------------------------------------------------------------------------------- /feedfilter/lib/feedfilter/includes.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | module FeedFilter 5 | 6 | 7 | class IncludeFilters 8 | 9 | include LogUtils::Logging 10 | 11 | def initialize( includes ) 12 | @includes = includes 13 | 14 | ## split terms (allow comma,pipe) - do NOT use space; allows e.g. terms such as github pages 15 | @terms = includes.split( /\s*[,|]\s*/ ) 16 | ## remove leading and trailing white spaces - check - still required when using \s* ?? 17 | @terms = @terms.map { |term| term.strip } 18 | end 19 | 20 | 21 | def match_item?( item ) 22 | match_terms?( item.title ) || 23 | match_terms?( item.summary ) || 24 | match_terms?( item.content ) 25 | end 26 | 27 | private 28 | 29 | def match_terms?( text ) ### make helper method private - why? why not?? 30 | return false if text.nil? || text.empty? ## allow/guard against nil and empty string (use blank?) 31 | 32 | @terms.each do |term| 33 | if /#{term}/i =~ text ## Note: lets ignore case (use i regex option) 34 | return true 35 | end 36 | end 37 | 38 | false # no term match found 39 | end 40 | 41 | end # class IncludeFilters 42 | 43 | end # module FeedFilter 44 | -------------------------------------------------------------------------------- /feedfilter/lib/feedfilter/version.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module FeedFilter 4 | 5 | MAJOR = 1 6 | MINOR = 1 7 | PATCH = 1 8 | VERSION = [MAJOR,MINOR,PATCH].join('.') 9 | 10 | def self.version 11 | VERSION 12 | end 13 | 14 | def self.banner 15 | "feedfilter/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]" 16 | end 17 | 18 | def self.root 19 | "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}" 20 | end 21 | 22 | end # module FeedFilter 23 | 24 | -------------------------------------------------------------------------------- /feedfilter/test/helper.rb: -------------------------------------------------------------------------------- 1 | ## $:.unshift(File.dirname(__FILE__)) 2 | 3 | 4 | ## minitest setup 5 | 6 | require 'minitest/autorun' 7 | 8 | 9 | ## our own code 10 | 11 | require 'feedfilter' 12 | 13 | LogUtils::Logger.root.level = :debug 14 | 15 | -------------------------------------------------------------------------------- /feedfilter/test/test_ads.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_ads.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | class TestAds < MiniTest::Test 11 | 12 | include FeedFilter::AdsFilter 13 | 14 | 15 | def test_feedflare_ads 16 | text =< 18 | 19 | 20 | 21 | 22 | EOS 23 | text = strip_ads( text ).strip 24 | 25 | assert_equal '', text 26 | end 27 | 28 | 29 | def test_feedburner_bugs 30 | text =< 32 | EOS 33 | text = strip_ads( text ).strip 34 | 35 | assert_equal '', text 36 | end 37 | 38 | end # class TestAds 39 | -------------------------------------------------------------------------------- /feedfilter/test/test_ads_all.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_ads_all.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | class TestAdsAll < MiniTest::Test 11 | 12 | def test_all 13 | names=[ 14 | 'feedburner', 15 | 'feedflare' 16 | ] 17 | 18 | names.each do |name| 19 | b = BlockReader.from_file( "#{FeedFilter.root}/config/#{name}.txt").read 20 | ## Note: replace newline and space in string for regex (w/o spaces) 21 | ## Note: add multiline option and ignore case 22 | regexp = Regexp.new( b[0].gsub( /[\n ]/, '' ), Regexp::MULTILINE|Regexp::IGNORECASE ) 23 | test1 = b[1] 24 | 25 | assert_equal '', test1.gsub( regexp, '' ).strip 26 | end 27 | end 28 | 29 | end # class TestAdsAll 30 | -------------------------------------------------------------------------------- /feedfilter/test/test_includes.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_includes.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | TestItem = Struct.new( :title, :summary, :content ) 11 | 12 | class TestIncludes < MiniTest::Test 13 | 14 | def test_item 15 | includesFilter = FeedFilter::IncludeFilters.new( 'github pages|jekyll' ) 16 | 17 | item1 = TestItem.new 18 | item1.title = 'title' 19 | item1.summary = 'summary' 20 | item1.content = 'content' 21 | 22 | item2 = TestItem.new 23 | item2.title = 'title' 24 | item2.summary = 'summary' 25 | item2.content = 'bla bla JEKYLL bla bla' 26 | 27 | assert false == includesFilter.match_item?( item1 ) 28 | assert true == includesFilter.match_item?( item2 ) 29 | end 30 | 31 | end # class TestIncludes 32 | 33 | -------------------------------------------------------------------------------- /feedfinder/.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | coverage 6 | InstalledFiles 7 | lib/bundler/man 8 | pkg 9 | rdoc 10 | spec/reports 11 | test/tmp 12 | test/version_tmp 13 | tmp 14 | 15 | # YARD artifacts 16 | .yardoc 17 | _yardoc 18 | doc/ 19 | 20 | 21 | -------------------------------------------------------------------------------- /feedfinder/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ### 0.0.1 / 2017-06-05 3 | 4 | * Everything is new. First release 5 | -------------------------------------------------------------------------------- /feedfinder/Manifest.txt: -------------------------------------------------------------------------------- 1 | HISTORY.md 2 | Manifest.txt 3 | README.md 4 | Rakefile 5 | lib/feedfinder.rb 6 | lib/feedfinder/version.rb 7 | -------------------------------------------------------------------------------- /feedfinder/README.md: -------------------------------------------------------------------------------- 1 | # feedfinder gem - web feed finder and discovery (RSS, Atom, JSON Feed, etc.) 2 | 3 | * home :: [github.com/feedparser/feedfinder](https://github.com/feedparser/feedfinder) 4 | * bugs :: [github.com/feedparser/feedfinder/issues](https://github.com/feedparser/feedfinder/issues) 5 | * gem :: [rubygems.org/gems/feedfinder](https://rubygems.org/gems/feedfinder) 6 | * rdoc :: [rubydoc.info/gems/feedfinder](http://rubydoc.info/gems/feedfinder) 7 | * forum :: [groups.google.com/group/wwwmake](http://groups.google.com/group/wwwmake) 8 | 9 | 10 | ## Usage 11 | 12 | To be done. 13 | 14 | 15 | ## License 16 | 17 | ![](https://publicdomainworks.github.io/buttons/zero88x31.png) 18 | 19 | The `feedfinder` scripts are dedicated to the public domain. 20 | Use it as you please with no restrictions whatsoever. 21 | 22 | ## Questions? Comments? 23 | 24 | Send them along to the [wwwmake Forum/Mailing List](http://groups.google.com/group/wwwmake). 25 | Thanks! 26 | -------------------------------------------------------------------------------- /feedfinder/Rakefile: -------------------------------------------------------------------------------- 1 | require 'hoe' 2 | require './lib/feedfinder/version.rb' 3 | 4 | Hoe.spec 'feedfinder' do 5 | 6 | self.version = FeedFinder::VERSION 7 | 8 | self.summary = "feedfinder - web feed finder and discovery (RSS, Atom, JSON Feed, etc.)" 9 | self.description = summary 10 | 11 | self.urls = ['https://github.com/feedparser/feedfinder'] 12 | 13 | self.author = 'Gerald Bauer' 14 | self.email = 'wwwmake@googlegroups.com' 15 | 16 | # switch extension to .markdown for gihub formatting 17 | self.readme_file = 'README.md' 18 | self.history_file = 'HISTORY.md' 19 | 20 | self.extra_deps = [ 21 | ['textutils', '>=1.0.1'], 22 | ] 23 | 24 | self.licenses = ['Public Domain'] 25 | 26 | self.spec_extras = { 27 | required_ruby_version: '>= 1.9.2' 28 | } 29 | 30 | end 31 | -------------------------------------------------------------------------------- /feedfinder/lib/feedfinder.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | # core and stdlibs 5 | 6 | 7 | # 3rd party gems/libs 8 | 9 | require 'textutils' 10 | 11 | # our own code 12 | 13 | require 'feedfinder/version' # let it always go first 14 | 15 | 16 | # say hello 17 | puts FeedFinder.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG) 18 | -------------------------------------------------------------------------------- /feedfinder/lib/feedfinder/version.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module FeedFinder 4 | 5 | MAJOR = 0 6 | MINOR = 2 7 | PATCH = 0 8 | VERSION = [MAJOR,MINOR,PATCH].join('.') 9 | 10 | def self.version 11 | VERSION 12 | end 13 | 14 | def self.banner 15 | "feedfinder/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]" 16 | end 17 | 18 | def self.root 19 | "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}" 20 | end 21 | 22 | end # module FeedFinder 23 | -------------------------------------------------------------------------------- /feedparser/.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | coverage 6 | InstalledFiles 7 | lib/bundler/man 8 | pkg 9 | rdoc 10 | spec/reports 11 | test/tmp 12 | test/version_tmp 13 | tmp 14 | 15 | # YARD artifacts 16 | .yardoc 17 | _yardoc 18 | doc/ 19 | -------------------------------------------------------------------------------- /feedparser/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ### 2.2.1 2 | ### 0.1.0 / 2013-09-19 3 | 4 | * Everything is new. First release. 5 | -------------------------------------------------------------------------------- /feedparser/Manifest.txt: -------------------------------------------------------------------------------- 1 | CHANGELOG.md 2 | Manifest.txt 3 | README.md 4 | Rakefile 5 | lib/feedparser.rb 6 | lib/feedparser/attachment.rb 7 | lib/feedparser/author.rb 8 | lib/feedparser/builder/atom.rb 9 | lib/feedparser/builder/json.rb 10 | lib/feedparser/builder/microformats.rb 11 | lib/feedparser/builder/rss.rb 12 | lib/feedparser/feed.rb 13 | lib/feedparser/generator.rb 14 | lib/feedparser/item.rb 15 | lib/feedparser/parser.rb 16 | lib/feedparser/tag.rb 17 | lib/feedparser/thumbnail.rb 18 | lib/feedparser/version.rb 19 | test/helper.rb 20 | test/media_rss_example.txt 21 | test/test_atom_live.rb 22 | test/test_attachments_live.rb 23 | test/test_dates.rb 24 | test/test_microformats.rb 25 | test/test_rss_live.rb 26 | -------------------------------------------------------------------------------- /feedparser/NOTES.md: -------------------------------------------------------------------------------- 1 | # Notes 2 | 3 | ## Add Attachments Update 4 | 5 | - [ ] add attachments to jsonfeed 6 | - [ ] add support for multiple attachments / media enclosures in atom 7 | 8 | ## Fix head lookahead (in parse) 9 | 10 | ``` 11 | @head = @text[0..100].strip # note: remove leading spaces if present 12 | change to 13 | @text.lstrip[0..100] ## first strip whitespace (or better use lstrip?) avoids all leading blanks in extreme case 14 | # or 15 | @text.lstrip.[0..100] ## more clear? 16 | ``` 17 | 18 | 19 | ## Check SSL Bug? 20 | 21 | ``` 22 | ### returns ssl error e.g. 23 | ## OpenSSL::SSL::SSLError: SSL_connect SYSCALL returned=5 errno=0 24 | ## state=SSLv2/v3 read server 25 | def test_googlegroup 26 | feed = fetch_and_parse_feed( 'https://groups.google.com/forum/feed/beerdb/topics/atom.xml?num=15' ) 27 | 28 | assert_equal 'atom', feed.format 29 | assert_equal 'https://groups.google.com/d/forum/beerdb', feed.url 30 | end 31 | ``` 32 | 33 | 34 | ## More ToDos 35 | 36 | - [ ] add published_confirmation (like password_confirmation) for dc:date duplicate if pubDate is (also) present? 37 | - check if dates are the same ?? issue warning if different?? 38 | 39 | - [ ] add "raw" published_text date string to all formats 40 | 41 | - [ ] add related_url for atom; use link rel=related 42 | 43 | - [ ] add published_local, updated_local to atom, rss and json (for feed not just items) 44 | 45 | - [ ] change .rss2 to simple .rss 46 | - rss 2.0 is just a "better" compatible version of the 0.9x series (0.90, 0.91, 0.92) 47 | 48 | - [ ] reorg feeds 49 | - use new feedburner folder - move all feeds "managed" by feedburner to folder 50 | - use a new google folder - why? why not? incl. google forum and blogger feed - why? why not? 51 | - for all remaining use a misc folder - why? why not?? 52 | 53 | - [ ] convert all dates to utc e.g. use DateTime#utc - why? why not? 54 | - example: 2015-01-16 08:33:57 UTC <= rfc822 Fri, 16 Jan 2015 09:33:57 +0100 55 | - or 2017-05-17 15:02:12 UTC <= iso8601 2017-05-17T08:02:12-07:00 56 | - and so on 57 | 58 | - [ ] check intertwingly.atom feed - uses relative urls - how to make absolute ?? 59 | - feed.url: /blog/ 60 | - feed.items[0].url: /blog/2017/04/07/Badges-We-dont-need-no-stinkin-badges 61 | 62 | 63 | - [x] change feed.generator_uri to generator_url (keep uri as alias) 64 | 65 | - [ ] turn gernerator into a struct (instead of three strings) 66 | - use generator.name, generator.url, generator.version, etc. 67 | - add alias for generator.name == generator.title e.g. name = title 68 | 69 | 70 | 71 | ## Limitations of Stdlib RSS reader 72 | 73 | ### RSS 2.0 74 | 75 | Cannot read feed_url link using atom:link type="self" e.g.: 76 | 77 | ``` 78 | 81 | 82 | 85 | 86 | ``` 87 | 88 | see books/nostarch.rss2 and others as examples. 89 | -------------------------------------------------------------------------------- /feedparser/Rakefile: -------------------------------------------------------------------------------- 1 | require 'hoe' 2 | require './lib/feedparser/version.rb' 3 | 4 | Hoe.spec 'feedparser' do 5 | 6 | self.version = FeedParser::VERSION 7 | 8 | self.summary = 'feedparser - web feed parser and normalizer (RSS, Atom, JSON Feed, HTML h-entry, etc.)' 9 | self.description = summary 10 | 11 | self.urls = { home: 'https://github.com/feedparser/feedparser' } 12 | 13 | self.author = 'Gerald Bauer' 14 | self.email = 'gerald.bauer@gmail.com' 15 | 16 | # switch extension to .markdown for gihub formatting 17 | self.readme_file = 'README.md' 18 | self.history_file = 'CHANGELOG.md' 19 | 20 | self.extra_deps = [ 21 | ['logutils', '>=0.6.1'], 22 | ['textutils', '>=1.0.0'], 23 | ## ['oga', '>=3.2.0'], note: oga is a "soft" dependency 24 | ] 25 | 26 | ### todo: add fetcher dep for testing (e.g. development only) 27 | 28 | self.licenses = ['Public Domain'] 29 | 30 | self.spec_extras = { 31 | required_ruby_version: '>= 2.2.2' 32 | } 33 | 34 | end 35 | -------------------------------------------------------------------------------- /feedparser/attic/atom_v03.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | ### 5 | # hack: 6 | ## try to patch/convert old obsolete atom v0.3 to v1(-ish) 7 | ## 8 | ## in convert 9 | ## version="0.3" => removed/dropped! - use ns for version 10 | ## xmlns="http://purl.org/atom/ns#" => xmlns="http://www.w3.org/2005/Atom" 11 | ## 12 | ## 2014-12-31T15:33:00Z => 13 | ## 2014-12-31T13:02:07Z => 14 | ## 15 | ## 16 | ## more changes: 17 | ## author url => author uri 18 | ## generator @url => generator @uri 19 | ## tagline => subtitle 20 | ## copyright => rights 21 | ## 2014-12-31T13:02:07Z => removed/dropped! 22 | ## 23 | ## todo/fix: fix/convert content @type @mode - why?? why not?? 24 | ## 25 | ## content @mode => removed/dropped! 26 | ## @type=text/plain @mode=escaped => @type=text 27 | ## @type=text/html @mode=escaped => @type=html 28 | 29 | 30 | ## see also 31 | ## - rakaz.nl/2005/07/moving-from-atom-03-to-10.html 32 | 33 | 34 | module FeedParser 35 | 36 | class AtomV03Helper 37 | 38 | include LogUtils::Logging 39 | 40 | def match?( xml ) 41 | ## Note: =~ return nil on match; convert to boolean e.g. always return true|false 42 | (xml =~ /]+>/ ) do |m| 47 | ## Note: m passed in is just a string w/ the match (NOT a match data object!) 48 | ## puts "match (#{m.class.name}): " 49 | ## pp m 50 | el = m.sub( /version="0\.3"/, '' ) 51 | el = el.sub( /xmlns="http:\/\/purl\.org\/atom\/ns#"/, 'xmlns="http://www.w3.org/2005/Atom"' ) 52 | el 53 | end 54 | 55 | xml = xml.gsub( //, '' ) 56 | xml = xml.gsub( /<\/modified>/, '' ) 57 | 58 | xml = xml.gsub( //, '' ) 59 | xml = xml.gsub( /<\/issued>/, '' ) 60 | xml 61 | end 62 | 63 | end # class AtomV03Helper 64 | 65 | end # module FeedParser 66 | 67 | -------------------------------------------------------------------------------- /feedparser/attic/feed.rb: -------------------------------------------------------------------------------- 1 | module FeedParser 2 | 3 | class Feed 4 | ### attr_accessor :object # not use for now 5 | 6 | attr_accessor :title_type # e.g. text|html|html-escaped (optional) -use - why?? why not?? 7 | attr_accessor :summary_type # e.g. text|html|html-escaped 8 | 9 | def title2?() @title2.nil? == false; end 10 | attr_accessor :title2 # e.g. subtitle (atom) 11 | attr_accessor :title2_type # e.g. text|html|html-escaped 12 | 13 | def built?() @built.nil? == false; end 14 | attr_accessor :built 15 | 16 | 17 | 18 | attr_accessor :generator_version # e.g. @version (atom) 19 | attr_accessor :generator_url # e.g. @uri (atom) 20 | 21 | ## note: generator_uri is an alias for generator_url 22 | alias :generator_uri :generator_url 23 | alias :generator_uri= :generator_url= 24 | 25 | 26 | 27 | 28 | end # class Feed 29 | 30 | end # module FeedParser 31 | -------------------------------------------------------------------------------- /feedparser/attic/item.rb: -------------------------------------------------------------------------------- 1 | module FeedParser 2 | 3 | class Item 4 | 5 | ## attr_accessor :object # not used for now -- orginal object (e.g RSS item or ATOM entry etc.) 6 | 7 | attr_accessor :title_type # optional for now (text|html|html-escaped) - not yet set 8 | 9 | attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set 10 | 11 | attr_accessor :url # todo: rename to link (use alias) ?? 12 | 13 | ## todo: add summary (alias description) ??? 14 | 15 | 16 | end # class Item 17 | 18 | end # module FeedParser 19 | 20 | -------------------------------------------------------------------------------- /feedparser/attic/test_atom_from_file.rb: -------------------------------------------------------------------------------- 1 | 2 | class TestAtomFromFile < MiniTest::Test 3 | 4 | def test_googlegroup 5 | feed = parse_feed_from_file( 'googlegroups.atom' ) 6 | 7 | assert_equal 'atom', feed.format 8 | assert_equal 'Google Groups', feed.generator 9 | assert_equal 'https://groups.google.com/d/forum/beerdb', feed.url 10 | end 11 | 12 | def test_googlegroup2 13 | feed = parse_feed_from_file( 'googlegroups2.atom' ) 14 | 15 | assert_equal 'atom', feed.format 16 | assert_equal 'Google Groups (w/ leading n trailing newlines stripped)', feed.generator 17 | assert_equal 'https://groups.google.com/d/forum/beerdb', feed.url 18 | end 19 | 20 | end 21 | 22 | -------------------------------------------------------------------------------- /feedparser/attic/test_atom_v03.rb: -------------------------------------------------------------------------------- 1 | 2 | 3 | class TestAtomV03 < MiniTest::Test 4 | 5 | def test_match 6 | xmlv1 = read_feed_from_file( 'googlegroups.atom' ) 7 | xmlv03 = read_feed_from_file( 'quirksblog.atom.v03' ) 8 | 9 | atomv03helper = FeedUtils::AtomV03Helper.new 10 | 11 | assert_equal false, atomv03helper.match?( xmlv1 ) 12 | assert_equal true, atomv03helper.match?( xmlv03 ) 13 | 14 | xmlv03up = atomv03helper.convert( xmlv03 ) 15 | assert_equal false, atomv03helper.match?( xmlv03up ) 16 | 17 | pp xmlv03up[0..1000] 18 | end 19 | 20 | def test_parse 21 | feed = parse_feed_from_file( 'quirksblog.atom.v03' ) 22 | 23 | pp feed.updated 24 | assert_equal '2014-12-31T15:33:00+00:00', feed.updated.to_s 25 | 26 | pp feed.items[0].updated 27 | assert_equal '2014-12-31T15:33:00+00:00', feed.items[0].updated.to_s 28 | 29 | pp feed.items[1].updated 30 | assert_equal '2014-11-26T12:11:25+00:00', feed.items[1].updated.to_s 31 | end 32 | 33 | end 34 | -------------------------------------------------------------------------------- /feedparser/feed-models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rubycocos/feedparser/c541894604acf8a8d09fa9ba10a1954fd2f6876e/feedparser/feed-models.png -------------------------------------------------------------------------------- /feedparser/lib/feedparser.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | # core and stdlibs 5 | 6 | require 'rss' 7 | require 'pp' 8 | require 'time' # note: ruby has a builtin core time class and a stdlib time class pack; require stdlib extensions 9 | require 'date' # note: ruby has a builtin core date class and a stdlib date class pack; require stdlib extensions 10 | require 'json' 11 | 12 | 13 | # 3rd party gems/libs 14 | 15 | require 'logutils' 16 | require 'textutils' 17 | 18 | 19 | # our own code 20 | 21 | require 'feedparser/version' # let it always go first 22 | 23 | require 'feedparser/builder/atom' 24 | require 'feedparser/builder/rss' 25 | require 'feedparser/builder/json' 26 | require 'feedparser/builder/microformats' 27 | 28 | 29 | require 'feedparser/feed' 30 | require 'feedparser/item' 31 | require 'feedparser/author' 32 | require 'feedparser/tag' 33 | require 'feedparser/attachment' 34 | require 'feedparser/thumbnail' 35 | require 'feedparser/generator' 36 | require 'feedparser/parser' 37 | 38 | 39 | 40 | # say hello 41 | puts FeedParser.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG) 42 | -------------------------------------------------------------------------------- /feedparser/lib/feedparser/attachment.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module FeedParser 4 | 5 | class Attachment ## also known as Enclosure 6 | 7 | attr_accessor :url 8 | ## note: uri is an alias for url 9 | alias :uri :url ## add atom alias for uri - why? why not? 10 | alias :uri= :url= 11 | 12 | attr_accessor :length 13 | attr_accessor :type 14 | 15 | # Elements from the media namespace attachment 16 | attr_accessor :title 17 | attr_accessor :thumbnail 18 | attr_accessor :description 19 | attr_accessor :community 20 | 21 | end # class Attachment 22 | 23 | end # module FeedParser 24 | -------------------------------------------------------------------------------- /feedparser/lib/feedparser/author.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module FeedParser 4 | 5 | class Author 6 | 7 | attr_accessor :name 8 | attr_accessor :url 9 | ## note: uri is an alias for url 10 | alias :uri :url ## add atom alias for uri - why? why not? 11 | alias :uri= :url= 12 | 13 | def email?() @email.nil? == false; end 14 | attr_accessor :email 15 | 16 | def avatar?() @avatar.nil? == false; end 17 | attr_accessor :avatar # todo/check: use avatar_url ?? used by json feed -check if always a url 18 | 19 | 20 | ## todo: add role - why? why not? 21 | ## e.g. add contributor (atom) 22 | ## or managingEditor (rss) or webMaster (rss) - why? why not?? 23 | 24 | attr_accessor :text # note: holds "unparsed" text (content) line form dc:creator or rss:author 25 | alias :line :text # line|text (add str?? too) 26 | 27 | def to_s 28 | ## note: to_s - allows to use just author in templates 29 | ## will by default return name if present or as fallback "unparsed" text line 30 | if @name ## not blank 31 | @name 32 | else 33 | @text 34 | end 35 | end 36 | 37 | end # class Author 38 | 39 | end # module FeedParser 40 | -------------------------------------------------------------------------------- /feedparser/lib/feedparser/builder/json.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module FeedParser 4 | 5 | class JsonFeedBuilder 6 | 7 | include LogUtils::Logging 8 | 9 | 10 | def self.build( hash ) 11 | feed = self.new( hash ) 12 | feed.to_feed 13 | end 14 | 15 | def initialize( hash ) 16 | @feed = build_feed( hash ) 17 | end 18 | 19 | def to_feed 20 | @feed 21 | end 22 | 23 | 24 | 25 | def build_feed( h ) 26 | feed = Feed.new 27 | feed.format = 'json' 28 | 29 | feed.title = h['title'] 30 | feed.url = h['home_page_url'] 31 | feed.feed_url = h['feed_url'] 32 | feed.summary = h['description'] 33 | 34 | 35 | if h['author'] 36 | feed.authors << build_author( h['author'] ) 37 | end 38 | 39 | 40 | h['items'].each do |hash_item| 41 | feed.items << build_item( hash_item ) 42 | end 43 | 44 | feed # return new feed 45 | end # method build_feed_from_json 46 | 47 | 48 | def build_author( h ) 49 | author = Author.new 50 | 51 | author.name = h['name'] 52 | author.url = h['url'] 53 | author.avatar = h['avatar'] 54 | 55 | author 56 | end 57 | 58 | 59 | 60 | def build_item( h ) 61 | item = Item.new # Item.new 62 | 63 | item.guid = h['id'] 64 | item.title = h['title'] 65 | item.url = h['url'] 66 | item.external_url = h['external_url'] 67 | 68 | ## convert date if present (from string to date type) 69 | date_published_str = h['date_published'] 70 | if date_published_str 71 | item.published_local = DateTime.iso8601( date_published_str ) 72 | item.published = item.published_local.utc 73 | end 74 | 75 | date_modified_str = h['date_modified'] 76 | if date_modified_str 77 | item.updated_local = DateTime.iso8601( date_modified_str ) 78 | item.updated = item.updated_local.utc 79 | end 80 | 81 | 82 | item.content_html = h['content_html'] 83 | item.content_text = h['content_text'] 84 | item.summary = h['summary'] 85 | 86 | if h['author'] 87 | item.authors << build_author( h['author'] ) 88 | end 89 | 90 | if h['tags'] 91 | h['tags'].each do |json_tag| 92 | item.tags << build_tag( json_tag ) 93 | end 94 | end 95 | 96 | item 97 | end # method build_item 98 | 99 | 100 | def build_tag( json_tag ) 101 | ## pp rss_cat 102 | tag = Tag.new 103 | 104 | tag.name = json_tag 105 | 106 | tag 107 | end # build_tag 108 | 109 | 110 | end # JsonFeedBuilder 111 | end # FeedParser 112 | -------------------------------------------------------------------------------- /feedparser/lib/feedparser/builder/microformats.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module FeedParser 4 | 5 | 6 | class HyFeedBuilder 7 | 8 | include LogUtils::Logging 9 | 10 | 11 | def self.build( hash ) 12 | feed = self.new( hash ) 13 | feed.to_feed 14 | end 15 | 16 | def initialize( hash ) 17 | @feed = build_feed( hash ) 18 | end 19 | 20 | def to_feed 21 | @feed 22 | end 23 | 24 | 25 | def build_feed( h ) 26 | 27 | b = HyBuilder.new( h ) ## convert hash to structs 28 | 29 | ## use first feed - more really possible? 30 | ## fix/todo: handle no feed too!!! 31 | hy = b.feeds[0] 32 | 33 | ## pp hy 34 | 35 | feed = Feed.new 36 | feed.format = 'html' 37 | 38 | ### todo: add 39 | ## - feed.title 40 | ## - feed.url 41 | ## - feed.feed_url 42 | ## - feed.summary 43 | ## - feed.authors 44 | ## etc. 45 | 46 | hy.entries.each do |entry| 47 | feed.items << build_item( entry ) 48 | end 49 | 50 | feed # return new feed 51 | end # method build_feed 52 | 53 | 54 | def build_author( hy ) 55 | author = Author.new 56 | 57 | author.name = hy.name 58 | 59 | ## todo - add: 60 | ## author.url 61 | 62 | author 63 | end 64 | 65 | 66 | 67 | def build_item( hy ) 68 | item = Item.new # Item.new 69 | 70 | item.title = hy.name 71 | item.url = hy.url 72 | item.published_local = hy.published_local 73 | item.published = hy.published 74 | 75 | item.content_html = hy.content_html 76 | item.content_text = hy.content_text 77 | item.summary = hy.summary 78 | 79 | ## check: how to add an id - auto-generate - why? why not?? 80 | ## item.id = h['id'] 81 | 82 | hy.authors.each do |author| 83 | item.authors << build_author( author ) 84 | end 85 | 86 | item 87 | end # method build_item 88 | 89 | end # class HyFeedBuilder 90 | 91 | 92 | 93 | class HyFeed 94 | attr_accessor :entries 95 | 96 | def initialize 97 | @entries = [] 98 | end 99 | end # class HyFeed 100 | 101 | 102 | class HyEntry 103 | attr_accessor :name 104 | attr_accessor :content 105 | attr_accessor :content_text 106 | attr_accessor :summary 107 | 108 | attr_accessor :published # utc time 109 | attr_accessor :published_local # local time (with timezone/offset) 110 | attr_accessor :url 111 | 112 | attr_accessor :authors # note: allow multiple authors 113 | 114 | # note: title is an alias for name 115 | alias :title :name 116 | alias :title= :name= 117 | 118 | # note: content_html is an alias for name 119 | alias :content_html :content 120 | alias :content_html= :content= 121 | 122 | def initialize 123 | @authors = [] 124 | end 125 | 126 | end ## class HyEntry 127 | 128 | 129 | class HyAuthor 130 | attr_accessor :name 131 | attr_accessor :url 132 | end ## class HyAuthor 133 | 134 | 135 | 136 | 137 | class HyBuilder 138 | 139 | attr_reader :feeds 140 | 141 | def initialize( hash ) 142 | @h = hash 143 | @feeds = [] 144 | build 145 | 146 | pp @feeds 147 | end 148 | 149 | def build 150 | 151 | entries = [] 152 | @h['items'].each_with_index do |item_hash,i| 153 | puts "item #{i+1}:" 154 | pp item_hash 155 | 156 | types = item_hash['type'] 157 | pp types 158 | if types.include?( 'h-feed' ) 159 | @feeds << build_feed( item_hash ) 160 | elsif types.include?( 'h-entry' ) 161 | entries << build_entry( item_hash ) 162 | else 163 | ## unknown type; skip for now 164 | end 165 | end 166 | 167 | ## wrap all "loose" entries in a "dummy" h-entry feed 168 | if entries.any? 169 | feed = HyFeed.new 170 | feed.entries = entries 171 | @feeds << feed 172 | end 173 | 174 | end # method build 175 | 176 | def build_feed( h ) 177 | puts " build_feed" 178 | 179 | feed = HyFeed.new 180 | 181 | h['children'].each_with_index do |item_hash,i| 182 | puts "item #{i+1}:" 183 | pp item_hash 184 | 185 | types = item_hash['type'] 186 | pp types 187 | if types.include?( 'h-entry' ) 188 | feed.entries << build_entry( item_hash ) 189 | else 190 | ## unknown type; skip for now 191 | end 192 | end 193 | 194 | feed 195 | end ## method build_feed 196 | 197 | 198 | def build_entry( h ) 199 | puts " build_entry" 200 | 201 | entry = HyEntry.new 202 | 203 | props = h['properties'] 204 | pp props 205 | 206 | entry.name = props['name'].join( ' ') # check an example with more entries (how to join??) 207 | 208 | if props['summary'] 209 | entry.summary = props['summary'].join( ' ' ) 210 | end 211 | 212 | if props['content'] 213 | ## add up all value attribs in content 214 | entry.content_text = props['content'].map { |h| h[:value] }.join( ' ' ).strip 215 | ## add up all html attribs in content; plus strip leading n trailing whitespaces 216 | entry.content = props['content'].map { |h| h[:html] }.join( ' ' ).strip 217 | end 218 | 219 | 220 | # get first field in array -- check if really ever possible more than one? what does it mean (many dates)??? 221 | ## todo: check if datetime is always utc (or local possible?) 222 | url_str = props.fetch( 'url', [] )[0] 223 | if url_str 224 | entry.url = url_str 225 | end 226 | 227 | # get first field in array -- check if really ever possible more than one? what does it mean (many dates)??? 228 | ## todo: check if datetime is always utc (or local possible?) 229 | published_str = props.fetch( 'published', [] )[0] 230 | pp published_str 231 | if published_str 232 | ## entry.published = DateTime.iso8601( published_str ) 233 | entry.published_local = DateTime.parse( published_str ) 234 | entry.published = entry.published_local.utc 235 | end 236 | 237 | ## check for authors 238 | if props['author'] 239 | props['author'].each do |author_hash| 240 | pp author_hash 241 | entry.authors << build_author( author_hash ) 242 | end 243 | end 244 | 245 | entry 246 | end # method build_entry 247 | 248 | def build_author( h ) 249 | puts " build_author" 250 | 251 | author = HyAuthor.new 252 | 253 | author.name = h['value'] 254 | 255 | ## todo/fix: -- note: for now skip possible embedded h-card 256 | author 257 | end # method build_author 258 | 259 | 260 | end # class HyBuilder 261 | 262 | 263 | 264 | end # module FeedParser 265 | -------------------------------------------------------------------------------- /feedparser/lib/feedparser/feed.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module FeedParser 4 | 5 | class Feed 6 | 7 | attr_accessor :format # e.g. atom|rss 2.0|json etc. 8 | attr_accessor :title 9 | attr_accessor :url ## todo - add alias site_url/home_page_url/page_url - why? why not?? 10 | attr_accessor :feed_url 11 | 12 | 13 | attr_accessor :items 14 | 15 | attr_accessor :authors 16 | def authors?() @authors && @authors.size > 0; end 17 | ## note: author? is an alias for authors? 18 | alias :author? :authors? 19 | 20 | ## add author shortcut e.g. equals authors[0] - for now only read only 21 | ## fix: also add author= why? why not??? 22 | def author() @authors[0]; end 23 | 24 | 25 | attr_accessor :tags 26 | def tags?() @tags && @tags.size > 0; end 27 | 28 | ## add alias category for tags (remove - why? why not?) 29 | alias :categories :tags 30 | 31 | 32 | def summary?() @summary.nil? == false; end 33 | attr_accessor :summary # e.g. description (rss)|subtitle (atom) 34 | 35 | ## add description as alias for summary (remove - why? why not?) 36 | alias :description :summary 37 | alias :description= :summary= 38 | alias :description? :summary? 39 | 40 | 41 | ## 42 | ## todo/check/fix: 43 | ## use a extra field for atom subtitle 44 | ## - subtitle not the same as summary - why? why not? 45 | ## - assume summary == description == abstract but 46 | ## keep subtitle separate e.g. assume subtitle is just a (simple) single line 47 | ## 48 | ## for now alias summary to subtitle 49 | alias :subtitle :summary 50 | alias :subtitle= :summary= 51 | alias :subtitle? :summary? 52 | 53 | 54 | 55 | def updated?() @updated.nil? == false; end 56 | attr_accessor :updated # e.g. lastBuildDate (rss)|updated (atom) -- always (converted) to utc 57 | attr_accessor :updated_local # "unparsed" local datetime as in feed (NOT converted to utc) 58 | 59 | attr_accessor :updated_text # string version of date 60 | alias :updated_line :updated_text # text|line - convention for "unparsed" 1:1 from feed; add str(too ??) 61 | 62 | def published?() @published.nil? == false; end 63 | attr_accessor :published # e.g. pubDate (rss)\n/a (atom) -- note: published is basically an alias for created 64 | attr_accessor :published_local # "unparsed" local datetime as in feed (NOT converted to utc) 65 | 66 | attr_accessor :published_text # string version of date 67 | alias :published_line :published_text # text|line - convention for "unparsed" 1:1 from feed; add str(too ??) 68 | 69 | 70 | attr_accessor :generator 71 | 72 | 73 | ## fix: 74 | # add pretty printer/inspect (exclude object) 75 | 76 | 77 | def initialize 78 | ## note: make items, authors, tags empty arrays on startup (e.g. not nil) 79 | @items = [] 80 | @authors = [] 81 | @tags = [] 82 | 83 | @generator = Generator.new 84 | end 85 | 86 | end # class Feed 87 | 88 | end # module FeedParser 89 | -------------------------------------------------------------------------------- /feedparser/lib/feedparser/generator.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module FeedParser 4 | 5 | class Generator 6 | 7 | attr_accessor :name 8 | ## note: title is an alias for name 9 | alias :title :name 10 | alias :title= :name= 11 | 12 | attr_accessor :version 13 | 14 | attr_accessor :url 15 | ## note: uri is an alias for url 16 | alias :uri :url ## add atom alias for uri - why? why not? 17 | alias :uri= :url= 18 | 19 | 20 | attr_accessor :text # note: holds "unparsed" text (content) line form rss:generator 21 | alias :line :text # line|text (add str?? too) 22 | 23 | 24 | def to_s 25 | ## note: to_s - allows to use just generator in templates 26 | ## will by default return name if present or as fallback "unparsed" text line 27 | if @name ## not blank 28 | @name 29 | else 30 | @text 31 | end 32 | end 33 | 34 | end # class Generator 35 | 36 | end # module FeedParser 37 | -------------------------------------------------------------------------------- /feedparser/lib/feedparser/item.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module FeedParser 4 | 5 | class Item 6 | 7 | attr_accessor :title 8 | attr_accessor :url 9 | attr_accessor :external_url 10 | 11 | # note: related_url is an alias for external_url 12 | alias :related_url :external_url ## link rel=related used in atom 13 | alias :related_url= :external_url= 14 | 15 | 16 | ## note: only content/content_html should use html; 17 | ## all others (e.g. title/summary/content_text) shoud be plain (vanilla) text 18 | 19 | 20 | def content?() @content.nil? == false; end 21 | attr_accessor :content 22 | 23 | ## note: content_html is an alias for content 24 | ## will hold type html/xhtml/html-escaped - check if always converted to string by parser ?? 25 | alias :content_html :content 26 | alias :content_html= :content= 27 | alias :content_html? :content? 28 | 29 | 30 | def content_text?() @content_text.nil? == false; end 31 | attr_accessor :content_text 32 | 33 | 34 | 35 | def summary?() @summary.nil? == false; end 36 | attr_accessor :summary 37 | 38 | ## add description as alias for summary (remove - why? why not?) 39 | alias :description :summary 40 | alias :description= :summary= 41 | alias :description? :summary? 42 | 43 | 44 | 45 | def updated?() @updated.nil? == false; end 46 | attr_accessor :updated # pubDate (RSS)|updated (Atom) 47 | attr_accessor :updated_local # "unparsed" local datetime as in feed (NOT converted to utc) 48 | 49 | attr_accessor :updated_text # string version of date 50 | alias :updated_line :updated_text # text|line - convention for "unparsed" 1:1 from feed; add str(too ??) 51 | 52 | 53 | def published?() @published.nil? == false; end 54 | attr_accessor :published # note: published is basically an alias for created 55 | attr_accessor :published_local # "unparsed" local datetime as in feed (NOT converted to utc) 56 | 57 | attr_accessor :published_text # string version of date 58 | alias :published_line :published_text # text|line - convention for "unparsed" 1:1 from feed; add str(too ??) 59 | 60 | 61 | attr_accessor :id 62 | 63 | ## note: guid is an alias for id 64 | alias :guid :id 65 | alias :guid= :id= 66 | 67 | attr_accessor :authors 68 | ## add author shortcut e.g. equals authors[0] - for now only read only 69 | ## fix: also add author= why? why not??? 70 | def authors?() @authors && @authors.size > 0; end 71 | ## note: author? is an alias for authors? 72 | alias :author? :authors? 73 | 74 | ## add author shortcut e.g. equals authors[0] - for now only read only 75 | ## fix: also add author= why? why not??? 76 | def author() @authors[0]; end 77 | 78 | 79 | attr_accessor :tags 80 | def tags?() @tags && @tags.size > 0; end 81 | 82 | alias :categories :tags # for now allow categories alias for tags - remove (why? why not?) 83 | 84 | 85 | # add attachments/media enclosures (url, length and type) 86 | # note: lets support more than one (it's an array) 87 | attr_accessor :attachments 88 | 89 | def attachment() @attachments[0]; end 90 | def attachments?() @attachments && @attachments.size > 0; end 91 | alias :attachment? :attachments? 92 | 93 | alias :enclosures :attachments 94 | alias :enclosure :attachment 95 | alias :enclosures? :attachments? 96 | alias :enclosure? :attachments? 97 | 98 | 99 | def initialize 100 | ## note: make authors, tags empty arrays on startup (e.g. not nil) 101 | @authors = [] 102 | @tags = [] 103 | @attachments = [] 104 | end 105 | 106 | end # class Item 107 | 108 | end # module FeedParser 109 | -------------------------------------------------------------------------------- /feedparser/lib/feedparser/parser.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module FeedParser 4 | 5 | 6 | class Parser 7 | 8 | include LogUtils::Logging 9 | 10 | 11 | ### convenience class/factory method 12 | def self.parse( text, opts={} ) 13 | self.new( text ).parse 14 | end 15 | 16 | ### Note: lets keep/use same API as RSS::Parser for now 17 | def initialize( text ) 18 | @text = text 19 | @head = @text[0..100].strip # note: remove leading spaces if present 20 | end 21 | 22 | 23 | 24 | #### note: 25 | # make format checks callable from outside (that is, use builtin helper methods) 26 | 27 | def is_xml? 28 | ## check if starts with knownn xml prologs 29 | @head.start_with?( ' do NOT validate (otherwise atom 0.3 fails) 20 | # 2) true => ignore unknown elements - use true - why? why not?? 21 | 22 | # Note: default is true,true - that is, do validate, and do ignore unknown elements 23 | 24 | 25 | ############ 26 | # format version mappings: 27 | # RSS::Atom::Feed => atom 28 | 29 | 30 | ########### 31 | # Note: RSS::Atom::Feed 32 | # - has no feed_version => assumes always 1.0 for now (no other atom format exists) 33 | 34 | 35 | 36 | ################## 37 | # RSS::Rss 38 | # - see http://www.ruby-doc.org/stdlib-2.0.0/libdoc/rss/rdoc/RSS/Rss.html 39 | 40 | puts "feed.class: #{feed.class.name}" 41 | 42 | 43 | ## puts "dump feed:" 44 | ## pp feed 45 | 46 | # puts "dump feed.channel:" 47 | # puts feed.channel.inspect 48 | 49 | puts "dump feed.title (#{feed.title.class.name}):" 50 | ## pp feed.title 51 | 52 | puts "dump feed.id (#{feed.id.class.name}):" 53 | ## pp feed.id 54 | 55 | puts "dump feed.updated (#{feed.updated.class.name}):" 56 | ## pp feed.updated 57 | 58 | =begin 59 | @link= 60 | [#, 69 | @rel="self", 70 | @title=nil, 71 | @type="application/atom+xml">, 72 | #, 81 | @rel="alternate", 82 | @title=nil, 83 | @type="text">], 84 | =end 85 | 86 | # check links (assume it's any array - always) 87 | puts "dump feed.link (#{feed.link.class.name}):" 88 | puts " link rel=#{feed.link.rel} type=#{feed.link.type} href=#{feed.link.href}" 89 | 90 | ## Note: use links (with s - plural to get back array) 91 | puts "dump feed.links (#{feed.links.class.name}):" 92 | 93 | feed.links.each_with_index do |link,i| 94 | puts "[#{i}] link rel=#{link.rel} type=#{link.type} href=#{link.href}" 95 | end 96 | 97 | 98 | ## todo/check: atom feed can include published element (optionaly)? 99 | 100 | if feed.respond_to?( :published ) 101 | puts "dump feed.published (#{feed.published.class.name}):" 102 | ## pp feed.published 103 | end 104 | 105 | 106 | pp feed 107 | -------------------------------------------------------------------------------- /feedparser/sandbox/testpp.rb: -------------------------------------------------------------------------------- 1 | 2 | require 'logutils' 3 | require 'textutils' 4 | require 'fetcher' 5 | 6 | 7 | ## our own code 8 | 9 | require 'feedparser' 10 | 11 | 12 | ## LogUtils::Logger.root.level = :debug 13 | 14 | feed_url = "http://openfootball.github.io/feed.json" 15 | 16 | text = Fetcher.read( feed_url ) 17 | feed = FeedParser::Parser.parse( text ) 18 | 19 | pp feed 20 | 21 | puts feed.title 22 | -------------------------------------------------------------------------------- /feedparser/sandbox/testrss.rb: -------------------------------------------------------------------------------- 1 | 2 | # stdlibs 3 | require 'rss' 4 | require 'pp' 5 | 6 | # 3rd party libs/gems 7 | require 'fetcher' 8 | 9 | ## feed_url = 'http://feeds.feedburner.com/Rubyflow?format=xml' # rss 2.0 10 | 11 | feed_url = 'https://www.nostarch.com/feeds/comingsoon.xml' # rss 2.0 12 | 13 | 14 | xml = Fetcher.read( feed_url ) 15 | 16 | feed = RSS::Parser.parse( xml, false, false ) # use most "liberal" version 17 | # 1) false => do NOT validate 18 | # 2) false => ignore unknown elements - use true - why? why not?? 19 | 20 | 21 | ############ 22 | # format version mappings: 23 | # RSS::Rss #rss_version==2.0 => rss 2.0 24 | # #rss_version== 25 | 26 | 27 | ################## 28 | # RSS::Rss 29 | # - see http://www.ruby-doc.org/stdlib-2.0.0/libdoc/rss/rdoc/RSS/Rss.html 30 | 31 | puts "feed.class: #{feed.class.name}" 32 | 33 | puts "feed.rss_version: #{feed.rss_version}" 34 | puts "feed.feed_version: #{feed.feed_version}" 35 | 36 | puts "feed.image:" 37 | pp feed.image 38 | 39 | 40 | pp feed 41 | -------------------------------------------------------------------------------- /feedparser/test/helper.rb: -------------------------------------------------------------------------------- 1 | ## $:.unshift(File.dirname(__FILE__)) 2 | 3 | 4 | ## minitest setup 5 | 6 | require 'minitest/autorun' 7 | 8 | require 'logutils' 9 | require 'textutils' 10 | require 'fetcher' 11 | 12 | 13 | ## our own code 14 | require 'feedparser' 15 | 16 | 17 | 18 | LogUtils::Logger.root.level = :debug 19 | 20 | 21 | def fetch_and_parse_feed( url ) 22 | text = Fetcher.read( url ) 23 | 24 | FeedParser::Parser.parse( text ) 25 | end 26 | -------------------------------------------------------------------------------- /feedparser/test/media_rss_example.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Calm Meditation 5 | http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com 6 | en-us 7 | Mon, 02 Apr 2018 16:19:56 -0700 8 | Mon, 02 Apr 2018 16:19:56 -0700 9 | tomjoht@gmail.com (Tom Johnson) 10 | Contains short videos capturing still scenes from nature with a music background, intended for calming or meditation purposes. When you're stressed out or upset, watch a few videos. As your mind focuses on the small details, let your worries and frustrations float away. The purpose is not to entertain or to distract, but to help calm, soothe, and surface your inner quiet. The videos contain scenes from the San Tomas Aquinas trail in Santa Clara, California. 11 | 12 | http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com 13 | Calm Meditation 14 | http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/images/calmmeditationlogo_small.png 15 | Contains short videos capturing still scenes from nature with a music background, intended for calming or meditation purposes. When you're stressed out or upset, watch a few videos. As your mind focuses on the small details, let your worries and frustrations float away. The purpose is not to entertain or to distract, but to help calm, soothe, and surface your inner quiet. The videos contain scenes from the San Tomas Aquinas trail in Santa Clara, California. 16 | 114 17 | 114 18 | 19 | 20 | 21 | Shade 22 | Mon, 23 Oct 2017 00:00:00 -0700 23 | http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/shade/ 24 | Quiet the mind, and the soul will speak. - Ma Jaya Sati Bhagavati 25 | http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/shade/ 26 | All 27 | Trail 28 | 29 | Shade 30 | Quiet the mind, and the soul will speak. - Ma Jaya Sati Bhagavati 31 | 32 | Tom Johnson 33 | 34 | 35 | 36 | 37 | Spectators 38 | Thu, 12 Oct 2017 00:00:00 -0700 39 | http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/spectators/ 40 | "Your worst enemy cannot harm you as much as your own thoughts, unguarded." – Buddha 41 | http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/spectators/ 42 | All 43 | Grass 44 | 45 | Spectators 46 | "Your worst enemy cannot harm you as much as your own thoughts, unguarded." – Buddha 47 | 48 | Tom Johnson 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /feedparser/test/test_atom_live.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_atom_live.rb 4 | # or better 5 | # rake test 6 | 7 | 8 | 9 | require 'helper' 10 | 11 | class TestAtomLive < Minitest::Test 12 | 13 | def test_rubyonrails 14 | feed = fetch_and_parse_feed( 'http://weblog.rubyonrails.org/feed/atom.xml' ) 15 | 16 | assert_equal 'atom', feed.format 17 | assert_equal 'https://weblog.rubyonrails.org/', feed.url 18 | ## note was (2020/1): 'http://weblog.rubyonrails.org/', feed.url 19 | end 20 | 21 | 22 | def test_railstutorial 23 | feed = fetch_and_parse_feed( 'http://feeds.feedburner.com/railstutorial?format=xml' ) 24 | 25 | assert_equal 'atom', feed.format 26 | assert_equal 'https://news.learnenough.com/', feed.url 27 | ## note was (2020/1): assert_equal 'http://news.learnenough.com/', feed.url 28 | ## note was (2017/5): assert_equal 'http://news.railstutorial.org/', feed.url 29 | end 30 | 31 | 32 | =begin 33 | ### returns ssl error e.g. 34 | ## OpenSSL::SSL::SSLError: SSL_connect SYSCALL returned=5 errno=0 state=SSLv2/v3 read server 35 | def test_googlegroup 36 | feed = fetch_and_parse_feed( 'https://groups.google.com/forum/feed/beerdb/topics/atom.xml?num=15' ) 37 | 38 | assert_equal 'atom', feed.format 39 | assert_equal 'https://groups.google.com/d/forum/beerdb', feed.url 40 | end 41 | =end 42 | 43 | 44 | def test_headius 45 | feed = fetch_and_parse_feed( 'http://blog.headius.com/feed.xml' ) 46 | ## note was (2020/1): 'http://blog.headius.com/feeds/posts/default' 47 | 48 | assert_equal 'atom', feed.format 49 | assert_equal 'Jekyll', feed.generator.name 50 | ## note was (2020/1): 'Blogger' 51 | 52 | assert_equal 'Charles Oliver Nutter', feed.title 53 | ## note was (2020/1): 'Headius', feed.title 54 | assert_equal 'Java, Ruby, and JVM guy trying to make sense of it all', feed.summary # aka subtitle in atom 55 | ## note was (2020/1): 'Helping the JVM Into the 21st Century', feed.title 56 | assert_equal 'https://headius.github.io/', feed.url 57 | ## note was (2020/1): 'http://blog.headius.com/' 58 | end 59 | 60 | end 61 | -------------------------------------------------------------------------------- /feedparser/test/test_attachments_live.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_attachments_live.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | ### 11 | ## note: needs to require oga gem (it's not required by default - it's a "soft" dependency) 12 | 13 | require 'oga' 14 | 15 | 16 | 17 | class TestAttachmentsLive < Minitest::Test 18 | 19 | def test_atom_enclose 20 | feed = fetch_and_parse_feed( 'http://www.lse.ac.uk/assets/richmedia/webFeeds/publicLecturesAndEvents_AtomAllMediaTypesLatest100.xml' ) 21 | 22 | assert_equal 'audio/mpeg', feed.items.first.attachment.type 23 | assert_equal 'audio/mpeg', feed.items.first.enclosure.type 24 | 25 | assert_equal true, feed.items.first.attachment? 26 | assert_equal true, feed.items.first.enclosure? 27 | end 28 | 29 | def test_atom_media 30 | feed = fetch_and_parse_feed( 'http://www.youtube.com/feeds/videos.xml?channel_id=UCZUT79WUUpZlZ-XMF7l4CFg' ) 31 | assert_equal true, feed.items.first.attachment? 32 | assert feed.items.first.attachments.first.title 33 | assert feed.items.first.attachments.first.url 34 | assert feed.items.first.attachments.first.thumbnail 35 | assert_instance_of FeedParser::Thumbnail, feed.items.first.attachments.first.thumbnail 36 | assert feed.items.first.attachments.first.thumbnail.url 37 | assert_equal 480, feed.items.first.attachments.first.thumbnail.width.to_i 38 | assert_equal 360, feed.items.first.attachments.first.thumbnail.height.to_i 39 | assert feed.items.first.attachments.first.description 40 | end 41 | 42 | def test_rss_media 43 | # tests an example RSS file from https://creator.amazon.com/documentation/ac/mrss.html. Not that unlike the Atom example, it does 44 | # does not put everything under media:group 45 | testpath = File.join(File.expand_path(File.dirname(__FILE__)), 'media_rss_example.txt') 46 | feed_rss = File.read( testpath ) 47 | feed = FeedParser::Parser.parse( feed_rss ) 48 | assert_equal true, feed.items.first.attachment? 49 | assert feed.items.first.attachments.first.title 50 | assert feed.items.first.attachments.first.url 51 | assert feed.items.first.attachments.first.thumbnail 52 | assert_instance_of FeedParser::Thumbnail, feed.items.first.attachments.first.thumbnail 53 | assert feed.items.first.attachments.first.thumbnail.url 54 | assert_nil feed.items.first.attachments.first.thumbnail.width 55 | assert_nil feed.items.first.attachments.first.thumbnail.height 56 | assert feed.items.first.attachments.first.description 57 | end 58 | 59 | def test_rss_enclosure 60 | feed = fetch_and_parse_feed( 'http://www.radiofreesatan.com/category/featured/feed/' ) 61 | 62 | assert_equal 'audio/mpeg', feed.items.first.attachment.type 63 | assert_equal 'audio/mpeg', feed.items.first.enclosure.type 64 | 65 | assert_equal true, feed.items.first.attachment? 66 | assert_equal true, feed.items.first.enclosure? 67 | end 68 | 69 | end 70 | -------------------------------------------------------------------------------- /feedparser/test/test_dates.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_dates.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | class TestDates < Minitest::Test 11 | 12 | def test_iso8601 # used by atom, json feed 13 | 14 | recs = [ 15 | [ '2017-05-20T19:23:06Z', DateTime.new(2017, 5,20,19,23, 6) ], # from daringfireball.json 16 | [ '2017-05-20T19:23:08Z', DateTime.new(2017, 5,20,19,23, 8) ], 17 | [ '2017-05-17T08:02:12-07:00', DateTime.new(2017, 5,17, 8, 2,12,'-7') ], # from jsonfeed.json 18 | [ '2017-05-18T21:08:49+00:00', DateTime.new(2017, 5,18,21, 8,49) ], # from byparker.json 19 | [ '2017-05-18T21:08:49.123+00:00', DateTime.new(2017, 5,18,21, 8,49.123) ], ### try with usec e.g. 49.124 20 | [ '2017-05-17T08:02:12.567-07:00', DateTime.new(2017, 5,17, 8, 2,12.567,'-7') ], 21 | ] 22 | 23 | recs.each do |rec| 24 | d = DateTime.iso8601( rec[0] ) 25 | puts "class: #{d.class.name} - #{d.utc} (#{d.usec}) <= iso8601 #{rec[0]}" 26 | pp d 27 | assert_equal rec[1], d 28 | end 29 | end # test_iso8601 30 | 31 | 32 | def test_rfc822 # used by rss 2.0 33 | 34 | recs = [ 35 | [ 'Sat, 17 Jan 2015 11:57:47 +0000', DateTime.new( 2015, 1,17,11,57,47) ], # from sitepoint.rss2 36 | [ 'Thu, 15 Jan 2015 15:00:56 +0000', DateTime.new( 2015, 1,15,15,00,56) ], 37 | [ 'Fri, 16 Jan 2015 17:33:47 +0100', DateTime.new( 2015, 1,16,17,33,47,'+1') ], # from rubyflow.rss2 38 | [ 'Fri, 16 Jan 2015 09:33:57 +0100', DateTime.new( 2015, 1,16, 9,33,57,'+1') ], 39 | [ 'Wed, 17 Dec 2014 12:30:48 +0000', DateTime.new( 2014,12,17,12,30,48) ], # from rubymine.rss2 40 | ] 41 | 42 | recs.each do |rec| 43 | d = DateTime.rfc822( rec[0] ) 44 | puts "class: #{d.class.name} - #{d.utc} (#{d.usec}) <= rfc822 #{rec[0]}" 45 | pp d 46 | assert_equal rec[1], d 47 | end 48 | 49 | end # test_rfc822 50 | 51 | 52 | end # class TestDates 53 | -------------------------------------------------------------------------------- /feedparser/test/test_microformats.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_microformats.rb 4 | # or better 5 | # rake test 6 | 7 | 8 | require 'helper' 9 | 10 | 11 | ### 12 | ## note: needs to require microformats gem (it's not required by default) 13 | 14 | require 'microformats' 15 | 16 | 17 | 18 | class TestMicroformats < Minitest::Test 19 | 20 | def test_hentry 21 | 22 | text =< 24 |

Microformats are amazing

25 |

Published by 26 | W. Developer 27 | on 28 | 29 |

In which I extoll the virtues of using microformats.

30 | 31 |
32 |

Blah blah blah

33 |
34 | 35 | HTML 36 | 37 | feed = FeedParser::Parser.parse( text ) 38 | 39 | assert_equal 'html', feed.format 40 | assert_equal 1, feed.items.size 41 | assert_equal 1, feed.items[0].authors.size 42 | assert_equal '

Blah blah blah

', feed.items[0].content_html 43 | assert_equal 'Blah blah blah', feed.items[0].content_text 44 | assert_equal 'Microformats are amazing', feed.items[0].title 45 | assert_equal 'In which I extoll the virtues of using microformats.', feed.items[0].summary 46 | assert_equal DateTime.new( 2013, 6, 13, 12, 0, 0 ).utc, feed.items[0].published 47 | 48 | assert_equal 'W. Developer', feed.items[0].authors[0].name 49 | end 50 | 51 | 52 | end # class TestMicroformats 53 | -------------------------------------------------------------------------------- /feedparser/test/test_rss_live.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_rss_live.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | class TestRssLive < Minitest::Test 10 | 11 | 12 | def test_rubyflow 13 | feed = fetch_and_parse_feed( 'http://feeds.feedburner.com/Rubyflow?format=xml' ) 14 | 15 | assert_equal 'rss 2.0', feed.format 16 | end 17 | 18 | def test_sitepointruby 19 | feed = fetch_and_parse_feed( 'http://www.sitepoint.com/ruby/feed/' ) 20 | 21 | assert_equal 'rss 2.0', feed.format 22 | end 23 | 24 | def test_lambdatheultimate 25 | ## check - has no item.guid - will use item.link for guid 26 | feed = fetch_and_parse_feed( 'http://lambda-the-ultimate.org/rss.xml' ) 27 | 28 | assert_equal 'rss 2.0', feed.format 29 | end 30 | 31 | def test_rubymine 32 | # includes item/content:encoded 33 | feed = fetch_and_parse_feed( 'http://feeds.feedburner.com/jetbrains_rubymine?format=xml' ) 34 | 35 | assert_equal 'rss 2.0', feed.format 36 | end 37 | 38 | end 39 | -------------------------------------------------------------------------------- /feeds/NOTES.md: -------------------------------------------------------------------------------- 1 | # Notes 2 | 3 | 4 | ## Todos 5 | 6 | in news/wahingtionpost-lbogs.innovations.rss: 7 | 8 | check: 9 | 10 | ``` 11 | assert in ./news/washingtonpost-blogs-innovations.rss: feed.items[0].title == "Google's AlphaGo beats the world's best Go player - again". 12 | --- expected 13 | +++ actual 14 | @@ -1,2 +1,2 @@ 15 | # encoding: UTF-8 16 | -"Google\u2019s AlphaGo beats the world\u2019s best Go player - again" 17 | +"Google\u2019s AlphaGo beats the world\u2019s best Go player \u2014 again" 18 | 19 | ## feed.items[0].title: Google’s AlphaGo beats the world’s best Go player — again 20 | 21 | => fix reader - do NOT (auto-)convert dashes!!!!!! 22 | ``` 23 | 24 | 25 | add support for multi-line with (preserved) newlines: 26 | 27 | ``` 28 | 30 | Google's AlphaGo beats the world's best Go player -- again 31 | 32 |
33 | AI: 2, Humanity: 0. A computer designed by Google researchers has beaten the world’s top Go player for the second game in a row, capturing the best-of-three match in Wuzhen, China, and confirming AI’s supremacy in what many consider as one of humanity’s most complex boardgames. Ke Jie, a 19-year old Go grandmaster, began the […]]]>
34 | ``` 35 | 36 | 37 | --- 38 | 39 | in news/nytimes.rss 40 | 41 | check 42 | 43 | ``` 44 | ## todo: how to check for empty description - use empty string (or use nil) ??? 45 | ## 46 | >>> pp feed.description 47 | ``` 48 | -------------------------------------------------------------------------------- /feeds/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Tests, Tests, Tests 3 | 4 | Feeds (in Atom, RSS, JSON Feed, HTML h-entry, etc.) with Test Assertions. 5 | 6 | 7 | ## Usage 8 | 9 | How does it work? 10 | 11 | Use the triple-dash (e.g. `---`) on its own line to separate 12 | the feed source from all test assertions. Example: 13 | 14 | 15 | ``` 16 | { 17 | "version": "https://jsonfeed.org/version/1", 18 | "title": "JSON Feed", 19 | "description": "JSON Feed is a pragmatic syndication format for blogs, microblogs, and other time-based content.", 20 | "home_page_url": "https://jsonfeed.org/", 21 | "feed_url": "https://jsonfeed.org/feed.json", 22 | "user_comment": "This feed allows you to read the posts...", 23 | "favicon": "https://jsonfeed.org/graphics/icon.png", 24 | "author": { 25 | "name": "Brent Simmons and Manton Reece" 26 | }, 27 | "items": [ 28 | { 29 | "id": "https://jsonfeed.org/2017/05/17/announcing_json_feed", 30 | "url": "https://jsonfeed.org/2017/05/17/announcing_json_feed", 31 | "title": "Announcing JSON Feed", 32 | "content_html": "

We — Manton Reece and Brent Simmons — have noticed that JSON has become the developers’ choice for APIs,...", 33 | "date_published": "2017-05-17T08:02:12-07:00" 34 | } 35 | ] 36 | } 37 | 38 | --- 39 | 40 | feed.format: json 41 | feed.title: JSON Feed 42 | feed.url: https://jsonfeed.org/ 43 | feed.feed_url: https://jsonfeed.org/feed.json 44 | feed.summary: JSON Feed is a pragmatic syndication format for blogs, microblogs, and other time-based content. 45 | 46 | feed.authors[0].name: Brent Simmons and Manton Reece 47 | 48 | feed.items[0].title: Announcing JSON Feed 49 | feed.items[0].url: https://jsonfeed.org/2017/05/17/announcing_json_feed 50 | feed.items[0].id: https://jsonfeed.org/2017/05/17/announcing_json_feed 51 | feed.items[0].published_local: >>> DateTime.new( 2017, 5, 17, 8, 2, 12, '-7' ) 52 | feed.items[0].published: >>> DateTime.new( 2017, 5, 17, 8, 2, 12, '-7' ).utc 53 | ``` 54 | 55 | 56 | ## Run Tests 57 | 58 | Use 59 | 60 | ``` 61 | ruby -I ./test test/test_feeds.rb 62 | ``` 63 | 64 | to run selected / individual test or to run 65 | all tests 66 | 67 | ``` 68 | rake # or 69 | rake test 70 | ``` 71 | 72 | 73 | Resulting in: 74 | 75 | ``` 76 | reading ./spec/rss/creator.rss ... 77 | [debug] using stdlib rss/0.2.7 78 | [debug] Parsing feed in xml... 79 | [debug] feed.class=RSS::Rss 80 | [debug] rss | feed.version >2.0< 81 | [debug] rss | feed.title >Test Dublin Core< : String 82 | [debug] rss | feed.description => summary >< : String 83 | [debug] rss | feed.lastBuildDate => updated >Mon, 29 May 2017 20:51:30 +0200< : Time 84 | [debug] rss | feed.pubDate => published >< : NilClass 85 | eval assert_equal %{Peter Baker}, feed.items[0].authors[0].to_s 86 | eval assert_equal %{Peter Baker}, feed.items[0].author.text 87 | eval assert_equal %{Peter Baker}, feed.items[0].author.to_s 88 | eval assert_equal nil, feed.items[0].author.email 89 | ... 90 | 91 | Finished in 5.104933s, 0.1959 runs/s, 79.1391 assertions/s. 92 | 93 | 1 runs, 404 assertions, 0 failures, 0 errors, 0 skips 94 | ``` 95 | -------------------------------------------------------------------------------- /feeds/Rakefile: -------------------------------------------------------------------------------- 1 | ### 2 | ## use TestTask from Rake for setting up testing 3 | ## see https://docs.ruby-lang.org/en/2.1.0/Rake/TestTask.html 4 | 5 | require 'rake/testtask' 6 | 7 | Rake::TestTask.new do |t| 8 | t.test_files = FileList['test/**/test_*.rb'] 9 | t.libs = ['test'] ## (auto-)add to load path 10 | end 11 | 12 | 13 | task :default => :test 14 | -------------------------------------------------------------------------------- /feeds/books/pragprog.rss: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | Pragmatic Bookshelf 9 | https://pragprog.com/ 10 | Up-to-date information about the Pragmatic Bookshelf 11 | en-us 12 | Sat, 27 May 2017 17:18:55 +0000 13 | 14 | Python Testing with pytest 15 | <div id="cms-content"> 16 | 17 | </div> 18 | 19 | Tue, 23 May 2017 18:35:00 +0000 20 | http://pragprog.com/news/python-testing-with-pytest?3639225 21 | http://pragprog.com/news/python-testing-with-pytest 22 | news 23 | 24 | 25 | Agile Web Development with Rails 5.1, Upgrade Details 26 | <div id="cms-content"> 27 | 28 | </div> 29 | 30 | Wed, 10 May 2017 14:32:55 +0000 31 | http://pragprog.com/news/agile-web-development-with-rails-5-1-upgrade-details?3629250 32 | http://pragprog.com/news/agile-web-development-with-rails-5-1-upgrade-details 33 | news 34 | 35 | 36 | Surprise Sale: 40% Off this week only 37 | <div id="cms-content"> 38 | 39 | </div> 40 | 41 | Wed, 26 Apr 2017 13:20:02 +0000 42 | http://pragprog.com/news/surprise-sale-40-off-this-week-only?3619134 43 | http://pragprog.com/news/surprise-sale-40-off-this-week-only 44 | news 45 | 46 | 47 | Now in print: iOS 10 SDK Development: Creating iPhone and iPad Apps with Swift 48 | <div id="cms-content"> 49 | 50 | </div> 51 | 52 | Tue, 28 Mar 2017 13:54:09 +0000 53 | http://pragprog.com/news/now-in-print-ios-10-sdk-development-creating-iphone-and-ipad-apps-with-swift?3607075 54 | http://pragprog.com/news/now-in-print-ios-10-sdk-development-creating-iphone-and-ipad-apps-with-swift 55 | news 56 | 57 | 58 | Functional Web Development with Elixir, OTP, and Phoenix 59 | <div id="cms-content"> 60 | 61 | </div> 62 | 63 | Tue, 21 Mar 2017 14:48:25 +0000 64 | http://pragprog.com/news/functional-web-development-with-elixir-otp-and-phoenix?3602607 65 | http://pragprog.com/news/functional-web-development-with-elixir-otp-and-phoenix 66 | news 67 | 68 | 69 | A Common-Sense Guide to Data Structures and Algorithms 70 | <div id="cms-content"> 71 | 72 | </div> 73 | 74 | Tue, 14 Mar 2017 17:32:21 +0000 75 | http://pragprog.com/news/a-common-sense-guide-to-data-structures-and-algorithms?3600282 76 | http://pragprog.com/news/a-common-sense-guide-to-data-structures-and-algorithms 77 | news 78 | 79 | 80 | Design It! From Programmer to Software Architect 81 | <div id="cms-content"> 82 | 83 | </div> 84 | 85 | Wed, 08 Mar 2017 14:30:18 +0000 86 | http://pragprog.com/news/design-it-from-programmer-to-software-architect?3597202 87 | http://pragprog.com/news/design-it-from-programmer-to-software-architect 88 | news 89 | 90 | 91 | Programming Elixir 1.3 in print 92 | <div id="cms-content"> 93 | 94 | </div> 95 | 96 | Sun, 19 Feb 2017 14:35:41 +0000 97 | http://pragprog.com/news/programming-elixir-1-3-in-print?3593675 98 | http://pragprog.com/news/programming-elixir-1-3-in-print 99 | news 100 | 101 | 102 | The Cucumber Book, Second Edition for Rails 5 103 | <div id="cms-content"> 104 | 105 | </div> 106 | 107 | Sun, 19 Feb 2017 14:31:11 +0000 108 | http://pragprog.com/news/the-cucumber-book-second-edition-for-rails-5?3593671 109 | http://pragprog.com/news/the-cucumber-book-second-edition-for-rails-5 110 | news 111 | 112 | 113 | Practical Vim for Vim 8 and Core Data for Swift 3 114 | <div id="cms-content"> 115 | 116 | </div> 117 | 118 | Sun, 19 Feb 2017 14:29:19 +0000 119 | http://pragprog.com/news/practical-vim-for-vim-8-and-core-data-for-swift-3?3593667 120 | http://pragprog.com/news/practical-vim-for-vim-8-and-core-data-for-swift-3 121 | news 122 | 123 | 124 | 125 | 126 | --- 127 | 128 | feed.format: rss 2.0 129 | feed.title: Pragmatic Bookshelf 130 | feed.url: https://pragprog.com/ 131 | feed.description: Up-to-date information about the Pragmatic Bookshelf 132 | feed.published: >>> DateTime.new( 2017, 5, 27, 17, 18, 55 ) 133 | 134 | 135 | feed.items.size: >>> 10 136 | 137 | feed.items[0].title: Python Testing with pytest 138 | feed.items[0].url: http://pragprog.com/news/python-testing-with-pytest 139 | feed.items[0].guid: http://pragprog.com/news/python-testing-with-pytest?3639225 140 | feed.items[0].published: >>> DateTime.new( 2017, 5, 23, 18, 35 ) 141 | 142 | feed.items[1].title: Agile Web Development with Rails 5.1, Upgrade Details 143 | feed.items[1].url: http://pragprog.com/news/agile-web-development-with-rails-5-1-upgrade-details 144 | feed.items[1].guid: http://pragprog.com/news/agile-web-development-with-rails-5-1-upgrade-details?3629250 145 | feed.items[1].published: >>> DateTime.new( 2017, 5, 10, 14, 32, 55 ) 146 | 147 | 148 | >>> pp feed.items[0].description 149 | -------------------------------------------------------------------------------- /feeds/comics/xkcd.atom: -------------------------------------------------------------------------------- 1 | 2 | 3 | xkcd.com 4 | 5 | https://xkcd.com/ 6 | 2017-05-22T00:00:00Z 7 | 8 | Genetic Testing Results 9 | 10 | 2017-05-22T00:00:00Z 11 | https://xkcd.com/1840/ 12 |

<img src="https://imgs.xkcd.com/comics/genetic_testing_results.png" title="That's very exciting! The bad news is that it's a risk factor for a lot of things." alt="That's very exciting! The bad news is that it's a risk factor for a lot of things." /> 13 | 14 | 15 | Doctor Visit 16 | 17 | 2017-05-19T00:00:00Z 18 | https://xkcd.com/1839/ 19 | <img src="https://imgs.xkcd.com/comics/doctor_visit.png" title="According to these blood tests, you're like 30% cereal." alt="According to these blood tests, you're like 30% cereal." /> 20 | 21 | 22 | Machine Learning 23 | 24 | 2017-05-17T00:00:00Z 25 | https://xkcd.com/1838/ 26 | <img src="https://imgs.xkcd.com/comics/machine_learning.png" title="The pile gets soaked with data and starts to get mushy over time, so it's technically recurrent." alt="The pile gets soaked with data and starts to get mushy over time, so it's technically recurrent." /> 27 | 28 | 29 | Rental Car 30 | 31 | 2017-05-15T00:00:00Z 32 | https://xkcd.com/1837/ 33 | <img src="https://imgs.xkcd.com/comics/rental_car.png" title="Technically, both cars are haunted, but the murder ghosts can't stand listening to the broken GPS for more than a few minutes." alt="Technically, both cars are haunted, but the murder ghosts can't stand listening to the broken GPS for more than a few minutes." /> 34 | 35 |
36 | 37 | --- 38 | 39 | feed.format: atom 40 | feed.title: xkcd.com 41 | feed.url: https://xkcd.com/ 42 | feed.updated: >>> DateTime.new( 2017, 5, 22 ) 43 | feed.items.size: >>> 4 44 | 45 | feed.items[0].title: Genetic Testing Results 46 | feed.items[0].url: https://xkcd.com/1840/ 47 | feed.items[0].guid: https://xkcd.com/1840/ 48 | feed.items[0].updated: >>> DateTime.new( 2017, 5, 22 ) 49 | -------------------------------------------------------------------------------- /feeds/comics/xkcd.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | xkcd.com 5 | https://xkcd.com/ 6 | xkcd.com: A webcomic of romance and math humor. 7 | en 8 | 9 | Genetic Testing Results 10 | https://xkcd.com/1840/ 11 | <img src="https://imgs.xkcd.com/comics/genetic_testing_results.png" title="That's very exciting! The bad news is that it's a risk factor for a lot of things." alt="That's very exciting! The bad news is that it's a risk factor for a lot of things." /> 12 | Mon, 22 May 2017 04:00:00 -0000 13 | https://xkcd.com/1840/ 14 | 15 | 16 | Doctor Visit 17 | https://xkcd.com/1839/ 18 | <img src="https://imgs.xkcd.com/comics/doctor_visit.png" title="According to these blood tests, you're like 30% cereal." alt="According to these blood tests, you're like 30% cereal." /> 19 | Fri, 19 May 2017 04:00:00 -0000 20 | https://xkcd.com/1839/ 21 | 22 | 23 | Machine Learning 24 | https://xkcd.com/1838/ 25 | <img src="https://imgs.xkcd.com/comics/machine_learning.png" title="The pile gets soaked with data and starts to get mushy over time, so it's technically recurrent." alt="The pile gets soaked with data and starts to get mushy over time, so it's technically recurrent." /> 26 | Wed, 17 May 2017 04:00:00 -0000 27 | https://xkcd.com/1838/ 28 | 29 | 30 | Rental Car 31 | https://xkcd.com/1837/ 32 | <img src="https://imgs.xkcd.com/comics/rental_car.png" title="Technically, both cars are haunted, but the murder ghosts can't stand listening to the broken GPS for more than a few minutes." alt="Technically, both cars are haunted, but the murder ghosts can't stand listening to the broken GPS for more than a few minutes." /> 33 | Mon, 15 May 2017 04:00:00 -0000 34 | https://xkcd.com/1837/ 35 | 36 | 37 | 38 | 39 | --- 40 | 41 | feed.format: rss 2.0 42 | feed.title: xkcd.com 43 | feed.description: xkcd.com: A webcomic of romance and math humor. 44 | feed.url: https://xkcd.com/ 45 | feed.items.size: >>> 4 46 | 47 | feed.items[0].title: Genetic Testing Results 48 | feed.items[0].url: https://xkcd.com/1840/ 49 | feed.items[0].guid: https://xkcd.com/1840/ 50 | feed.items[0].published: >>> DateTime.new( 2017, 5, 22, 4, 0 ) 51 | feed.items[0].description: That's very exciting! The bad news is that it's a risk factor for a lot of things. 52 | 53 | 54 | feed.items[1].title: Doctor Visit 55 | feed.items[1].url: https://xkcd.com/1839/ 56 | feed.items[1].guid: https://xkcd.com/1839/ 57 | feed.items[1].published: >>> DateTime.new( 2017, 5, 19, 4, 0 ) 58 | 59 | >>> pp feed.items[0].description 60 | -------------------------------------------------------------------------------- /feeds/misc/googlegroups.atom: -------------------------------------------------------------------------------- 1 | 2 | https://groups.google.com/d/forum/beerdb 3 | Open Beer & Brewery Database (beer.db) 4 | Free open public domain beer database &amp; schema (beer.db) for use in any (programming) language (e.g. uses plain text fixtures/data sets). Questions? Comments? 5 | 6 | 7 | Google Groups 8 | 9 | 10 | Joe Sixpack 11 | 12 | 2014-12-17T11:54:43Z 13 | https://groups.google.com/d/topic/beerdb/KpQOUDYJ3J8 14 | 15 | Planet Beer (Austria, Belgium) - Feeds Incl. Craft Fest Wien, Beer-A-Day, proBier n Friends 16 | Hello, I've started putting together a planet site for beer, that is, Planet Beer [1]. The first feed lists include: - Austria [2] - Belgium [3] You're welcome and invited to suggest new countries and feeds. Cheers. Prost. [1] http://planetbeer.herokuapp.com [2] http://github.com/openbeer/planet/blob/master/ 17 | 18 | 19 | 20 | --- 21 | 22 | feed.format: atom 23 | feed.title: Open Beer & Brewery Database (beer.db) 24 | feed.url: https://groups.google.com/d/forum/beerdb 25 | 26 | feed.generator.name: Google Groups 27 | 28 | feed.items[0].title: Planet Beer (Austria, Belgium) - Feeds Incl. Craft Fest Wien, Beer-A-Day, proBier n Friends 29 | feed.items[0].url: https://groups.google.com/d/topic/beerdb/KpQOUDYJ3J8 30 | 31 | 32 | ### todo: fix: &amp; => & -> always assume plain text? (by default) - auto-escape xml entities?? 33 | feed.summary: Free open public domain beer database & schema (beer.db) for use in any (programming) language (e.g. uses plain text fixtures/data sets). Questions? Comments? 34 | 35 | ### todo: add check for datetime (use to_s ??) 36 | ## feed.updated.to_s: 2014-12-31T15:33:00+00:00 37 | ## feed.items[0].to_s: 2014-12-31T15:33:00+00:00 38 | -------------------------------------------------------------------------------- /feeds/misc/googlegroups2.atom: -------------------------------------------------------------------------------- 1 | 2 | https://groups.google.com/d/forum/beerdb 3 | Open Beer & Brewery Database (beer.db) 4 | Free open public domain beer database &amp; schema (beer.db) for use in any (programming) language (e.g. uses plain text fixtures/data sets). Questions? Comments? 5 | 6 | 7 | 8 | Google Groups (w/ leading n trailing newlines stripped) 9 | 10 | 11 | 12 | Joe Sixpack 13 | 14 | 2014-12-17T11:54:43Z 15 | https://groups.google.com/d/topic/beerdb/KpQOUDYJ3J8 16 | 17 | Planet Beer (Austria, Belgium) - Feeds Incl. Craft Fest Wien, Beer-A-Day, proBier n Friends 18 | Hello, I've started putting together a planet site for beer, that is, Planet Beer [1]. The first feed lists include: - Austria [2] - Belgium [3] You're welcome and invited to suggest new countries and feeds. Cheers. Prost. [1] http://planetbeer.herokuapp.com [2] http://github.com/openbeer/planet/blob/master/ 19 | 20 | 21 | 22 | --- 23 | 24 | feed.format: atom 25 | feed.url: https://groups.google.com/d/forum/beerdb 26 | 27 | feed.generator.name: Google Groups (w/ leading n trailing newlines stripped) 28 | -------------------------------------------------------------------------------- /feeds/misc/jsonfeed.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "https://jsonfeed.org/version/1", 3 | "title": "JSON Feed", 4 | "description": "JSON Feed is a pragmatic syndication format for blogs, microblogs, and other time-based content.", 5 | "home_page_url": "https://jsonfeed.org/", 6 | "feed_url": "https://jsonfeed.org/feed.json", 7 | "user_comment": "This feed allows you to read the posts from this site in any feed reader that supports the JSON Feed format. To add this feed to your reader, copy the following URL — https://jsonfeed.org/feed.json — and add it your reader.", 8 | "favicon": "https://jsonfeed.org/graphics/icon.png", 9 | "author": { 10 | "name": "Brent Simmons and Manton Reece" 11 | }, 12 | "items": [ 13 | { 14 | "id": "https://jsonfeed.org/2017/05/17/announcing_json_feed", 15 | "url": "https://jsonfeed.org/2017/05/17/announcing_json_feed", 16 | "title": "Announcing JSON Feed", 17 | "content_html": "

We — Manton Reece and Brent Simmons — have noticed that JSON has become the developers’ choice for APIs, and that developers will often go out of their way to avoid XML. JSON is simpler to read and write, and it’s less prone to bugs.

\n\n

So we developed JSON Feed, a format similar to RSS and Atom but in JSON. It reflects the lessons learned from our years of work reading and publishing feeds.

\n\n

See the spec. It’s at version 1, which may be the only version ever needed. If future versions are needed, version 1 feeds will still be valid feeds.

\n\n

Notes

\n\n

We have a WordPress plugin and, coming soon, a JSON Feed Parser for Swift. As more code is written, by us and others, we’ll update the code page.

\n\n

See Mapping RSS and Atom to JSON Feed for more on the similarities between the formats.

\n\n

This website — the Markdown files and supporting resources — is up on GitHub, and you’re welcome to comment there.

\n\n

This website is also a blog, and you can subscribe to the RSS feed or the JSON feed (if your reader supports it).

\n\n

We worked with a number of people on this over the course of several months. We list them, and thank them, at the bottom of the spec. But — most importantly — Craig Hockenberry spent a little time making it look pretty. :)

", 18 | "date_published": "2017-05-17T08:02:12-07:00" 19 | } 20 | ] 21 | } 22 | 23 | --- 24 | 25 | feed.format: json 26 | feed.title: JSON Feed 27 | feed.url: https://jsonfeed.org/ 28 | feed.feed_url: https://jsonfeed.org/feed.json 29 | feed.summary: JSON Feed is a pragmatic syndication format for blogs, microblogs, and other time-based content. 30 | 31 | feed.authors[0].name: Brent Simmons and Manton Reece 32 | 33 | 34 | feed.items[0].title: Announcing JSON Feed 35 | feed.items[0].url: https://jsonfeed.org/2017/05/17/announcing_json_feed 36 | feed.items[0].id: https://jsonfeed.org/2017/05/17/announcing_json_feed 37 | feed.items[0].published: >>> DateTime.new( 2017, 5, 17, 8, 2, 12, '-7' ).utc 38 | -------------------------------------------------------------------------------- /feeds/news/nytimes-paul-krugman.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 60 4 | Paul Krugman 5 | https://www.nytimes.com/column/paul-krugman 6 | Paul Krugman, a New York Times Op-Ed columnist, writes about macroeconomics, trade, health care, social policy and politics. In 2008, he received the Nobel Prize in Economics. 7 | Copyright 2017 The New York Times Company 8 | en-us 9 | 10 | Trump’s Energy, Low and Dirty 11 | https://www.nytimes.com/2017/05/29/opinion/trump-g-7-summit-energy.html 12 | Risking the planet to keep a lie alive. 13 | By PAUL KRUGMAN 14 | Mon, 29 May 2017 03:21:09 -0500 15 | 16 | 17 | It’s All About Trump’s Contempt 18 | https://www.nytimes.com/2017/05/26/opinion/trumpcare-cbo-federal-budget.html 19 | His budget and health plan show he despises his voters. Will they notice? 20 | By PAUL KRUGMAN 21 | Fri, 26 May 2017 03:21:23 -0500 22 | 23 | 24 | The Unfreeing of American Workers 25 | https://www.nytimes.com/2017/05/22/opinion/american-workers-noncompete-agreements.html 26 | Creeping along the real road to serfdom. 27 | By PAUL KRUGMAN 28 | Mon, 22 May 2017 03:21:10 -0500 29 | 30 | 31 | What’s the Matter With Republicans? 32 | https://www.nytimes.com/2017/05/19/opinion/whats-the-matter-with-republicans.html 33 | We need to understand what made Trump possible. 34 | By PAUL KRUGMAN 35 | Fri, 19 May 2017 03:21:08 -0500 36 | 37 | 38 | The Priming of Mr. Donald Trump 39 | https://www.nytimes.com/2017/05/15/opinion/trump-tax-cuts-deficit.html 40 | He’s not the only one with fiscal fantasies. 41 | By PAUL KRUGMAN 42 | Mon, 15 May 2017 03:21:15 -0500 43 | 44 | 45 | Judas, Tax Cuts and the Great Betrayal 46 | https://www.nytimes.com/2017/05/12/opinion/judas-tax-cuts-and-the-great-betrayal.html 47 | The Republican response to Trump’s cover-up will live in infamy forever. 48 | By PAUL KRUGMAN 49 | Fri, 12 May 2017 03:22:04 -0500 50 | 51 | 52 | Republicans Party Like It’s 1984https://www.nytimes.com/2017/05/08/opinion/republicans-party-like-its-1984.htmlMaking policy by lying about everything.By PAUL KRUGMANMon, 08 May 2017 03:21:24 -0500 53 | 54 | What’s the Matter With Europe?https://www.nytimes.com/2017/05/05/opinion/european-union-france-election.htmlLe Pen must be beaten, but then what?By PAUL KRUGMANFri, 05 May 2017 03:21:24 -0500 55 | 56 | On the Power of Being Awfulhttps://www.nytimes.com/2017/05/01/opinion/donald-trump-on-the-power-of-being-awful.htmlTrump supporters will never admit they were wrong.By PAUL KRUGMANMon, 01 May 2017 03:21:16 -0500 57 | 58 | Living in the Trump Zonehttps://www.nytimes.com/2017/04/28/opinion/living-in-the-trump-zone.htmlWe’re in a place and time where childish petulance drives policy.By PAUL KRUGMANFri, 28 Apr 2017 03:21:22 -0500 59 | 60 | 61 | 62 | --- 63 | 64 | feed.format: rss 2.0 65 | feed.title: Paul Krugman 66 | feed.url: https://www.nytimes.com/column/paul-krugman 67 | feed.description: Paul Krugman, a New York Times Op-Ed columnist, writes about macroeconomics, trade, health care, social policy and politics. In 2008, he received the Nobel Prize in Economics. 68 | 69 | feed.items[0].title: Trump’s Energy, Low and Dirty 70 | feed.items[0].url: https://www.nytimes.com/2017/05/29/opinion/trump-g-7-summit-energy.html 71 | feed.items[0].description: Risking the planet to keep a lie alive. 72 | feed.items[0].author.text: By PAUL KRUGMAN 73 | feed.items[0].published: >>> DateTime.new( 2017, 5, 29, 3, 21, 9, '-5').utc 74 | 75 | feed.items[1].title: It’s All About Trump’s Contempt 76 | feed.items[1].url: https://www.nytimes.com/2017/05/26/opinion/trumpcare-cbo-federal-budget.html 77 | feed.items[1].description: His budget and health plan show he despises his voters. Will they notice? 78 | feed.items[1].author.text: By PAUL KRUGMAN 79 | feed.items[1].published: >>> DateTime.new( 2017, 5, 26, 3, 21,23, '-5').utc 80 | -------------------------------------------------------------------------------- /feeds/news/nytimes-thomas-l-friedman.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 60 4 | Thomas L. Friedman 5 | https://www.nytimes.com/column/thomas-l-friedman 6 | Thomas L. Friedman, a New York Times Op-Ed columnist, writes about foreign affairs, globalization and technology. 7 | Copyright 2017 The New York Times Company 8 | en-us 9 | 10 | A Road Trip Through Rusting and Rising America 11 | https://www.nytimes.com/2017/05/24/opinion/rusting-and-rising-america.html 12 | The comeback of distressed and lost communities is the story of Bill Clinton’s America, not Donald Trump’s. 13 | By THOMAS L. FRIEDMAN 14 | Wed, 24 May 2017 03:21:08 -0500 15 | 16 | 17 | It’s Chicken or Fish 18 | https://www.nytimes.com/2017/05/16/opinion/trump-republicans-2018-elections.html 19 | Elected Republicans won’t stand up to Trump’s abuse of power, like his asking James Comey to halt the Flynn-Russia inquiry. So now you have a choice. 20 | By THOMAS L. FRIEDMAN 21 | Tue, 16 May 2017 19:41:47 -0500 22 | 23 | 24 | Owning Your Own Future 25 | https://www.nytimes.com/2017/05/10/opinion/owning-your-own-future.html 26 | Stop learning and you could find yourself without a job. 27 | By THOMAS L. FRIEDMAN 28 | Wed, 10 May 2017 03:21:01 -0500 29 | 30 | 31 | Trump: Crazy Like a Fox, or Just Crazy? 32 | https://www.nytimes.com/2017/05/03/opinion/trump-crazy-like-a-fox-or-just-crazy.html 33 | The president’s remarks about his first 100 days have been simply bizarre. 34 | By THOMAS L. FRIEDMAN 35 | Wed, 03 May 2017 03:21:08 -0500 36 | 37 | 38 | On a Par 5 in Dubai, Good Humor and a Respite From All Things Trump 39 | https://www.nytimes.com/2017/04/26/opinion/on-a-par-5-in-dubai-good-humor-and-a-respite-from-all-things-trump.html 40 | Oh, and there’s this yogi with a flowing white beard and golf clothes. 41 | By THOMAS L. FRIEDMAN 42 | Wed, 26 Apr 2017 03:21:15 -0500 43 | 44 | 45 | Coal Museum Sees the Future; Trump Doesn’t 46 | https://www.nytimes.com/2017/04/19/opinion/coal-museum-sees-the-future-trump-doesnt.html 47 | The president is focused on prolonging a dying industry rather than on renewable energy, which is making the country stronger without him. 48 | By THOMAS L. FRIEDMAN 49 | Wed, 19 Apr 2017 03:21:23 -0500 50 | 51 | 52 | Why Is Trump Fighting ISIS in Syria? 53 | https://www.nytimes.com/2017/04/12/opinion/why-is-trump-fighting-isis-in-syria.html 54 | A strategy built on territory won’t ultimately eliminate the terrorist group. 55 | By THOMAS L. FRIEDMAN 56 | Wed, 12 Apr 2017 03:21:13 -0500 57 | 58 | 59 | President Trump’s Real-World Syria Lesson 60 | https://www.nytimes.com/2017/04/05/opinion/president-trumps-real-world-syria-lesson.html 61 | Doing nothing shouldn’t be an option. 62 | By THOMAS L. FRIEDMAN 63 | Wed, 05 Apr 2017 03:21:22 -0500 64 | 65 | 66 | Trump Is a Chinese Agent 67 | https://www.nytimes.com/2017/03/29/opinion/trump-is-a-chinese-agent.html 68 | Ignoring climate change and the benefits of clean energy only helps a rival. 69 | By THOMAS L. FRIEDMAN 70 | Wed, 29 Mar 2017 03:21:26 -0500 71 | 72 | 73 | Calling On a Few Good Men 74 | https://www.nytimes.com/2017/03/22/opinion/calling-on-a-few-good-men.html 75 | An open letter to the adults in the Trump administration with the most integrity. 76 | By THOMAS L. FRIEDMAN 77 | Wed, 22 Mar 2017 03:21:26 -0500 78 | 79 | 80 | 81 | 82 | --- 83 | 84 | feed.format: rss 2.0 85 | feed.title: Thomas L. Friedman 86 | feed.url: https://www.nytimes.com/column/thomas-l-friedman 87 | feed.description: Thomas L. Friedman, a New York Times Op-Ed columnist, writes about foreign affairs, globalization and technology. 88 | 89 | feed.items[0].title: A Road Trip Through Rusting and Rising America 90 | feed.items[0].url: https://www.nytimes.com/2017/05/24/opinion/rusting-and-rising-america.html 91 | feed.items[0].description: [[ 92 | The comeback of distressed and lost communities is the story of Bill Clinton’s America, not Donald Trump’s. ]] 93 | feed.items[0].author.text: By THOMAS L. FRIEDMAN 94 | feed.items[0].published: >>> DateTime.new( 2017, 5, 24, 3, 21, 8, '-5' ).utc 95 | 96 | feed.items[1].title: It’s Chicken or Fish 97 | feed.items[1].url: https://www.nytimes.com/2017/05/16/opinion/trump-republicans-2018-elections.html 98 | feed.items[1].description: [[ 99 | Elected Republicans won’t stand up to Trump’s abuse of power, 100 | like his asking James Comey to halt the Flynn-Russia inquiry. So now you have a choice. ]] 101 | feed.items[1].author.text: By THOMAS L. FRIEDMAN 102 | feed.items[1].published: >>> DateTime.new( 2017, 5, 16, 19, 41, 47, '-5' ).utc 103 | 104 | feed.items[2].title: Owning Your Own Future 105 | feed.items[2].url: https://www.nytimes.com/2017/05/10/opinion/owning-your-own-future.html 106 | feed.items[2].description: Stop learning and you could find yourself without a job. 107 | -------------------------------------------------------------------------------- /feeds/spec/atom/author.atom: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test Author 4 | 5 | 6 | http://example.com 7 | 2017-05-23T19:36:44Z 8 | 9 | John Jay Gruber 10 | http://example.com/ 11 | 12 | 13 | 14 | Feedbin, Too 15 | 16 | tag:daringfireball.net,2017:/linked//6.33732 17 | 2017-05-23T18:50:36Z 18 | 2017-05-23T19:03:33Z 19 | 20 | Max Gruber Jun. 21 | http://example.com/ 22 | 23 | 24 | Ben Ubois, announcing support for JSON Feed in Feedbin... 25 | 26 | 27 | 28 | 29 | --- 30 | 31 | feed.authors.size: >>> 1 32 | feed.authors[0].name: John Jay Gruber 33 | feed.authors[0].url: http://example.com/ 34 | feed.authors[0].email: >>> nil 35 | 36 | feed.author.name: John Jay Gruber 37 | feed.author.url: http://example.com/ 38 | feed.author.email: >>> nil 39 | 40 | 41 | feed.items[0].authors.size: >>> 1 42 | feed.items[0].authors[0].name: Max Gruber Jun. 43 | feed.items[0].authors[0].url: http://example.com/ 44 | feed.items[0].authors[0].email: >>> nil 45 | 46 | feed.items[0].author.name: Max Gruber Jun. 47 | feed.items[0].author.url: http://example.com/ 48 | feed.items[0].author.email: >>> nil 49 | -------------------------------------------------------------------------------- /feeds/spec/atom/authors.atom: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test Author 4 | 5 | 6 | http://example.com 7 | 2017-05-23T19:36:44Z 8 | 9 | John Jay Gruber 10 | http://example.com/ 11 | 12 | 13 | Frank Huber 14 | http://example.com/frank/ 15 | 16 | 17 | 18 | Feedbin, Too 19 | 20 | tag:daringfireball.net,2017:/linked//6.33732 21 | 2017-05-23T18:50:36Z 22 | 2017-05-23T19:03:33Z 23 | 24 | Max Gruber Jun. 25 | http://example.com/ 26 | 27 | 28 | Frank Huber 29 | http://example.com/frank/ 30 | frank@example.com 31 | 32 | 33 | George Imker 34 | http://example.com/george/ 35 | 36 | 37 | Ben Ubois, announcing support for JSON Feed in Feedbin... 38 | 39 | 40 | 41 | 42 | --- 43 | 44 | feed.authors.size: >>> 2 45 | feed.authors[0].name: John Jay Gruber 46 | feed.authors[0].url: http://example.com/ 47 | feed.authors[0].email: >>> nil 48 | feed.authors[1].name: Frank Huber 49 | feed.authors[1].url: http://example.com/frank/ 50 | 51 | feed.author.name: John Jay Gruber 52 | feed.author.url: http://example.com/ 53 | feed.author.email: >>> nil 54 | 55 | 56 | 57 | feed.items[0].authors.size: >>> 3 58 | feed.items[0].authors[0].name: Max Gruber Jun. 59 | feed.items[0].authors[0].url: http://example.com/ 60 | feed.items[0].authors[0].email: >>> nil 61 | feed.items[0].authors[1].name: Frank Huber 62 | feed.items[0].authors[1].url: http://example.com/frank/ 63 | feed.items[0].authors[1].email: frank@example.com 64 | feed.items[0].authors[2].name: George Imker 65 | feed.items[0].authors[2].url: http://example.com/george/ 66 | feed.items[0].authors[2].email: >>> nil 67 | 68 | feed.items[0].author.name: Max Gruber Jun. 69 | feed.items[0].author.url: http://example.com/ 70 | feed.items[0].author.email: >>> nil 71 | -------------------------------------------------------------------------------- /feeds/spec/atom/categories.atom: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test Categories 4 | 5 | 6 | http://example.com 7 | 2017-05-23T19:36:44Z 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | Rock Surprise 25 | 26 | https://www.tbray.org/ongoing/When/201x/2017/05/20/Rock-Surprise 27 | 2017-05-20T12:00:00-07:00 28 | 2017-05-21T11:13:00-07:00 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | On a recent Saturday we accidentally took in two very 38 | different pop-music concerts... 39 | 40 | 41 | 42 | 43 | --- 44 | 45 | feed.tags.size: >>> 12 46 | feed.tags[0].name: jruby 47 | feed.tags[1].name: ruby 48 | feed.tags[2].name: java 49 | feed.tags[3].name: rails 50 | feed.tags[4].name: invokedynamic 51 | feed.tags[5].name: jvm 52 | feed.tags[6].name: application bundle 53 | feed.tags[7].name: compilation 54 | feed.tags[8].name: compiler 55 | feed.tags[9].name: dynamic dispatch 56 | feed.tags[10].name: dynamic languages 57 | feed.tags[11].name: ruby 2.0 58 | 59 | 60 | feed.items[0].tags.size: >>> 5 61 | feed.items[0].tags[0].name: Arts/Music 62 | feed.items[0].tags[0].scheme: https://www.tbray.org/ongoing/What/ 63 | feed.items[0].tags[1].name: Arts 64 | feed.items[0].tags[2].name: Music 65 | feed.items[0].tags[3].name: Arts/Photos 66 | feed.items[0].tags[4].name: Photos 67 | -------------------------------------------------------------------------------- /feeds/spec/json/example.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "https://jsonfeed.org/version/1", 3 | "title": "My Example Feed", 4 | "home_page_url": "https://example.org/", 5 | "feed_url": "https://example.org/feed.json", 6 | "items": [ 7 | { 8 | "id": "2", 9 | "content_text": "This is a second item.", 10 | "url": "https://example.org/second-item" 11 | }, 12 | { 13 | "id": "1", 14 | "content_html": "

Hello, world!

", 15 | "url": "https://example.org/initial-post" 16 | } 17 | ] 18 | } 19 | 20 | --- 21 | 22 | feed.format: json 23 | feed.title: My Example Feed 24 | feed.url: https://example.org/ 25 | feed.feed_url: https://example.org/feed.json 26 | 27 | feed.items.size: >>> 2 28 | 29 | feed.items[0].id: 2 30 | feed.items[0].content_text: This is a second item. 31 | feed.items[0].url: https://example.org/second-item 32 | 33 | 34 | feed.items[1].id: 1 35 | feed.items[1].content_html:

Hello, world!

36 | feed.items[1].url: https://example.org/initial-post 37 | -------------------------------------------------------------------------------- /feeds/spec/json/microblog.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "https://jsonfeed.org/version/1", 3 | "user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json", 4 | "title": "Brent Simmons’s Microblog", 5 | "home_page_url": "https://example.org/", 6 | "feed_url": "https://example.org/feed.json", 7 | "author": { 8 | "name": "Brent Simmons", 9 | "url": "http://example.org/", 10 | "avatar": "https://example.org/avatar.png" 11 | }, 12 | "items": [ 13 | { 14 | "id": "2347259", 15 | "url": "https://example.org/2347259", 16 | "content_text": "Cats are neat. \n\nhttps://example.org/cats", 17 | "date_published": "2016-02-09T14:22:00-07:00" 18 | } 19 | ] 20 | } 21 | 22 | --- 23 | 24 | feed.format: json 25 | feed.title: Brent Simmons’s Microblog 26 | feed.url: https://example.org/ 27 | feed.feed_url: https://example.org/feed.json 28 | 29 | feed.authors.size: >>> 1 30 | feed.authors[0].name: Brent Simmons 31 | feed.authors[0].url: http://example.org/ 32 | feed.authors[0].avatar: https://example.org/avatar.png 33 | 34 | 35 | feed.items.size: >>> 1 36 | feed.items[0].id: 2347259 37 | feed.items[0].url: https://example.org/2347259 38 | feed.items[0].content_text: Cats are neat. \n\nhttps://example.org/cats 39 | feed.items[0].published: >>> DateTime.new( 2016, 2, 9, 14, 22, 0, '-7').utc 40 | feed.items[0].published_local: >>> DateTime.new( 2016, 2, 9, 14, 22, 0, '-7') 41 | 42 | >>> pp feed.items[0].published 43 | >>> pp feed.items[0].published_local 44 | -------------------------------------------------------------------------------- /feeds/spec/json/tags.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "https://jsonfeed.org/version/1", 3 | "title": "My Example Feed", 4 | "home_page_url": "https://example.org/", 5 | "feed_url": "https://example.org/feed.json", 6 | "items": [ 7 | { 8 | "id": "2", 9 | "content_text": "This is a second item.", 10 | "url": "https://example.org/second-item", 11 | "tags": ["one", "two", "fourty four"] 12 | }, 13 | { 14 | "id": "1", 15 | "content_html": "

Hello, world!

", 16 | "url": "https://example.org/initial-post", 17 | "tags": ["compiler","dynamic dispatch","dynamic languages","ruby 2.0"] 18 | } 19 | ] 20 | } 21 | 22 | --- 23 | 24 | feed.items[0].tags.size: >>> 3 25 | feed.items[0].tags[0].name: one 26 | feed.items[0].tags[1].name: two 27 | feed.items[0].tags[2].name: fourty four 28 | 29 | feed.items[1].tags.size: >>> 4 30 | feed.items[1].tags[0].name: compiler 31 | feed.items[1].tags[1].name: dynamic dispatch 32 | feed.items[1].tags[2].name: dynamic languages 33 | feed.items[1].tags[3].name: ruby 2.0 34 | -------------------------------------------------------------------------------- /feeds/spec/microformats/hentry.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |

Microformats are amazing

5 |

Published by 6 | W. Developer 7 | on 8 | 9 |

In which I extoll the virtues of using microformats.

10 | 11 |
12 |

Blah blah blah

13 |
14 |
15 | -------------------------------------------------------------------------------- /feeds/spec/rss/author.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Author 5 | http://example.com/ 6 | 7 | Mon, 29 May 2017 18:51:30 GMT 8 | Matt Flegenheimer Jr. 9 | Matt Flegenheimer Sr. 10 | 11 | Jared Kushner’s Role Is Tested as Russia Case Grows 12 | https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html?partner=rss&emc=rss 13 | https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html 14 | Mon, 29 May 2017 01:23:57 GMT 15 | Peter Baker 16 | 17 | 18 | 19 | 20 | --- 21 | 22 | feed.authors.size: >>> 2 23 | feed.authors[0].text: Matt Flegenheimer Jr. 24 | feed.authors[0].to_s: Matt Flegenheimer Jr. 25 | feed.authors[0].email: >>> nil 26 | feed.authors[1].text: Matt Flegenheimer Sr. 27 | feed.authors[1].to_s: Matt Flegenheimer Sr. 28 | 29 | feed.author.text: Matt Flegenheimer Jr. 30 | feed.author.to_s: Matt Flegenheimer Jr. 31 | feed.author.email: >>> nil 32 | 33 | 34 | feed.items[0].authors.size: >>> 1 35 | feed.items[0].authors[0].text: Peter Baker 36 | feed.items[0].authors[0].to_s: Peter Baker 37 | feed.items[0].authors[0].email: >>> nil 38 | 39 | feed.items[0].author.text: Peter Baker 40 | feed.items[0].author.to_s: Peter Baker 41 | feed.items[0].author.email: >>> nil 42 | -------------------------------------------------------------------------------- /feeds/spec/rss/categories.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Categories 5 | http://example.com 6 | 7 | Mon, 29 May 2017 18:51:30 GMT 8 | United States Politics and Government 9 | Social Media 10 | News and News Media 11 | 12 | 13 | Jared Kushner’s Role Is Tested as Russia Case Grows 14 | https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html?partner=rss&emc=rss 15 | https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html 16 | It is unclear how Jared Kushner’s high-profile woes will affect his hard-won partnership with his father-in-law, perhaps the most stable in an often unstable White House. 17 | Mon, 29 May 2017 01:23:57 GMT 18 | Presidents and Presidency (US) 19 | Appointments and Executive Changes 20 | United States 21 | Jared Kushner 22 | Donald J. Trump 23 | 24 | 25 | 26 | Reported Talks by Jared Kushner With Russia Would Be ‘Good Thing,’ Trump Official Says 27 | https://www.nytimes.com/2017/05/28/us/politics/trump-returns-to-us-and-to-berating-newsmedia-on-twitter.html?partner=rss&emc=rss 28 | https://www.nytimes.com/2017/05/28/us/politics/trump-returns-to-us-and-to-berating-newsmedia-on-twitter.html 29 | John F. Kelly, the homeland security secretary, played down reports of a back channel. The president, meanwhile, resumed using Twitter as a weapon. 30 | Sun, 28 May 2017 16:57:05 GMT 31 | Trump, Donald J 32 | United States Politics and Government 33 | Social Media 34 | News and News Media 35 | Twitter 36 | 37 | 38 | 39 | 40 | --- 41 | 42 | feed.tags.size: >>> 3 43 | feed.tags[0].name: United States Politics and Government 44 | feed.tags[0].domain: http://www.nytimes.com/namespaces/keywords/des 45 | feed.tags[1].name: Social Media 46 | feed.tags[2].name: News and News Media 47 | 48 | 49 | feed.items[0].tags.size: >>> 5 50 | feed.items[0].tags[0].name: Presidents and Presidency (US) 51 | feed.items[0].tags[0].domain: http://www.nytimes.com/namespaces/keywords/mdes 52 | feed.items[0].tags[1].name: Appointments and Executive Changes 53 | feed.items[0].tags[2].name: United States 54 | feed.items[0].tags[3].name: Jared Kushner 55 | feed.items[0].tags[4].name: Donald J. Trump 56 | 57 | 58 | feed.items[1].tags.size: >>> 5 59 | feed.items[1].tags[0].name: Trump, Donald J 60 | feed.items[1].tags[0].domain: http://www.nytimes.com/namespaces/keywords/nyt_per 61 | feed.items[1].tags[1].name: United States Politics and Government 62 | feed.items[1].tags[2].name: Social Media 63 | feed.items[1].tags[3].name: News and News Media 64 | feed.items[1].tags[4].name: Twitter 65 | -------------------------------------------------------------------------------- /feeds/spec/rss/creator.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Dublin Core 5 | http://example.com/ 6 | 7 | Mon, 29 May 2017 18:51:30 GMT 8 | Matt Flegenheimer 9 | 10 | Jared Kushner’s Role Is Tested as Russia Case Grows 11 | https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html?partner=rss&emc=rss 12 | https://www.nytimes.com/2017/05/28/us/kushner-trump-relationship-russia-investigation.html 13 | Mon, 29 May 2017 01:23:57 GMT 14 | Peter Baker 15 | 16 | 17 | 18 | 19 | --- 20 | 21 | feed.authors.size: >>> 1 22 | feed.authors[0].text: Matt Flegenheimer 23 | feed.authors[0].to_s: Matt Flegenheimer 24 | feed.authors[0].email: >>> nil 25 | 26 | feed.author.text: Matt Flegenheimer 27 | feed.author.to_s: Matt Flegenheimer 28 | feed.author.email: >>> nil 29 | 30 | 31 | feed.items[0].authors.size: >>> 1 32 | feed.items[0].authors[0].text: Peter Baker 33 | feed.items[0].authors[0].to_s: Peter Baker 34 | feed.items[0].authors[0].email: >>> nil 35 | 36 | feed.items[0].author.text: Peter Baker 37 | feed.items[0].author.to_s: Peter Baker 38 | feed.items[0].author.email: >>> nil 39 | -------------------------------------------------------------------------------- /feeds/test/helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | require 'logutils' 5 | require 'textutils' 6 | 7 | # note: for now use "packaged" version in gem 8 | # (not the source in ../feedparser/lib) 9 | require 'feedparser' 10 | 11 | # note: add microformats support (optional) 12 | require 'microformats' 13 | 14 | 15 | 16 | require 'minitest/autorun' 17 | 18 | 19 | LogUtils::Logger.root.level = :debug 20 | 21 | 22 | 23 | def walk(root, &block) 24 | Dir.foreach(root) do |name| 25 | ## puts "name: #{name}" 26 | path = File.join(root, name) 27 | 28 | if name == '.' || name == '..' 29 | next 30 | elsif File.directory?( path ) 31 | 32 | ## note: skip .git !! 33 | ## test folder with ruby test scripts 34 | next if ['.git', 'test'].include?( name ) 35 | 36 | puts "** directory: #{path}/" 37 | walk( path, &block ) 38 | else 39 | puts " #{name}" 40 | block.call( path ) ## same as yield( path ) 41 | end 42 | end 43 | end 44 | 45 | 46 | ## add custom assert 47 | module MiniTest 48 | class Test 49 | 50 | 51 | ## note: 52 | ## regex excape bracket: [ to \[ 53 | ## \\ needs to get escaped twice e.g. (\\ becomes \) 54 | TXT_BEGIN = "\\[\\[" 55 | TXT_END = "\\]\\]" 56 | 57 | 58 | def assert_feed( text, tests, opts={} ) 59 | 60 | 61 | name = opts[:name] || '' 62 | 63 | 64 | feed = FeedParser::Parser.parse( text ) 65 | 66 | ################################################## 67 | ## pass 1: remove blank lines & comment lines 68 | 69 | lines = [] 70 | 71 | tests.each_line do |line| 72 | line = line.strip 73 | 74 | if line.start_with? '#' 75 | next ## skip comment lines too 76 | end 77 | 78 | 79 | if line == '__END__' 80 | break ## support end of file marker (skip/ignore all lines after __END__) 81 | end 82 | 83 | lines << line 84 | end 85 | 86 | 87 | ######################################### 88 | ## pass 2: "fold" multi-line items 89 | ## e.g. 90 | ## feed.items[0].description: [[ 91 | ## In the United States, the social media giant has been an advocate of equal treatment of all Internet content. 92 | ## In India, regulators who share that belief have effectively blocked a free Facebook service. 93 | ## ]] 94 | ## becomes =>: 95 | ## feed.items[0].description: In the United States, the social media giant has been an advocate of equal treatment of all Internet content. In India, regulators who share that belief have effectively blocked a free Facebook service. 96 | ## 97 | 98 | ## 99 | ## use [[> (instead of just [[) to mark string as to preserve newlines 100 | ## or [[| |]] (two brackets with pipe??) or [[[ ]]] (three brackets) - why? why not? 101 | ## or use python style """ and """" - why? why not? 102 | 103 | 104 | ####### 105 | ## note: preserve blank lines in multi-line "verbatim" items 106 | ## 107 | 108 | lines_ii = [] 109 | buf = '' 110 | inside_txt = false 111 | 112 | lines.each do |line| 113 | 114 | if inside_txt == false 115 | 116 | if line =~ /#{TXT_BEGIN}/ 117 | s = StringScanner.new( line ) 118 | expr = s.scan_until( /(?=#{TXT_BEGIN})/ ) 119 | _ = s.scan( /#{TXT_BEGIN}/ ) 120 | value = s.rest 121 | 122 | buf = '' # reset 123 | buf << expr.strip # add expresion before TXT_BEGIN 124 | 125 | if value.nil? || value.strip.empty? 126 | # add nothing ;-) 127 | else 128 | buf << ' ' 129 | buf << value.strip 130 | end 131 | inside_txt = true 132 | else 133 | if line =~ /^[ \t]*$/ 134 | next ## skip blank lines (NOT in "verbatim" multi-line string blocks) 135 | end 136 | 137 | lines_ii << line # copy as is 1:1 138 | end 139 | else ## inside_txt == true 140 | if line =~ /#{TXT_END}/ 141 | s = StringScanner.new( line ) 142 | value = s.scan_until( /(?=#{TXT_END})/ ) 143 | _ = s.scan( /#{TXT_END}/ ) 144 | _ = s.rest 145 | 146 | if value.strip.empty? 147 | # add nothing ;-) 148 | else 149 | buf << ' ' 150 | buf << value.strip 151 | end 152 | lines_ii << buf ## add "folded" line 153 | inside_txt = false 154 | else 155 | if line.strip.empty? 156 | ## empty lines get skipped for now => add support for mode with preserved newlines why? why not??? 157 | else 158 | buf << " " ## note: newline converter to just one space 159 | buf << line.strip 160 | end 161 | end 162 | end 163 | end # each lines 164 | 165 | 166 | 167 | ######################################### 168 | ## pass 3: eval asserts, finally ;-) 169 | 170 | lines_ii.each do |line| 171 | 172 | if line.start_with? '>>>' 173 | ## for debugging allow "custom" code e.g. >>> pp feed.items[0].summary etc. 174 | code = line[3..-1].strip 175 | msg = "eval in #{name}: >>> #{code}" 176 | else 177 | pos = line.index(':') ## assume first colon (:) is separator 178 | expr = line[0...pos].strip ## NOTE: do NOT include colon (thus, use tree dots ...) 179 | value = line[pos+1..-1].strip 180 | 181 | ## for ruby code use |> or >> or >>> or => or $ or | or RUN or ???? 182 | ## otherwise assume "literal" string 183 | 184 | if value.start_with? '>>>' 185 | value = value[3..-1].strip 186 | msg = "assert in #{name}: >>> #{expr} == #{value}" 187 | 188 | if value == 'nil' 189 | code = "assert_nil #{expr}, %{#{msg}}" ## note: use assert_nil for nils 190 | else 191 | code = "assert_equal #{value}, #{expr}, %{#{msg}}" 192 | end 193 | else # assume value is a "plain" string 194 | ## note use %{ } so value can include quotes ('') etc. 195 | msg = %{assert in #{name}: #{expr} == "#{value}"} 196 | code = "assert_equal %{#{value}}, #{expr}, %{#{msg}}" 197 | end 198 | end 199 | 200 | puts msg 201 | eval( code ) 202 | end # each line 203 | end 204 | 205 | end 206 | end # module MiniTest 207 | -------------------------------------------------------------------------------- /feeds/test/test_feeds.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./test test/test_feeds.rb 6 | # or better 7 | # rake test 8 | 9 | require 'helper' 10 | 11 | 12 | class TestFeeds < MiniTest::Test 13 | 14 | def test_all 15 | walk_feeds( '.' ) 16 | 17 | ## parse_feeds( './news/guardian*' ) 18 | ## parse_feeds( './news/nytimes-blogs*' ) 19 | end # method test_all 20 | 21 | 22 | 23 | private 24 | 25 | def walk_feeds( root='.' ) 26 | walk( root ) do |path| 27 | 28 | ## note: skip README, Rakefile etc. 29 | ## check for extensions 30 | extname = File.extname( path ) # note: includes dot e.g. .json etc. 31 | next unless ['.json', '.html', '.xml', '.rss', '.rss2', '.atom'].include?( extname ) 32 | 33 | parse_feed( path ) 34 | end 35 | end # walk_feeds 36 | 37 | 38 | def parse_feeds( pattern ) 39 | files = Dir.glob( pattern ) 40 | files.each do |path| 41 | puts " #{path}" 42 | parse_feed( path ) 43 | end 44 | end 45 | 46 | 47 | def parse_feed( path ) 48 | puts " reading #{path} ..." 49 | 50 | b = BlockReader.from_file( path ).read 51 | 52 | ## puts " [debug] block.size: #{b.size}" 53 | text = b[0] ## block I: feed source text (xml, json, html, etc.) 54 | tests = b[1] ## block II: test assert source 55 | 56 | if tests.nil? 57 | puts "!!!! test asserts missing in #{path} !!!" 58 | ## exit 1 59 | else 60 | assert_feed( text, tests, name: path ) 61 | end 62 | end 63 | 64 | end # class TestFeeds 65 | -------------------------------------------------------------------------------- /feedtxt.specs/README.md: -------------------------------------------------------------------------------- 1 | 2 | {% include header.html %} 3 | 4 | 7 | 8 | # Feed.TXT - A Free Feeds Format in Plain Text w/ Structured Meta Data 9 | 10 | 11 | What's Feed.TXT? Let's start with an example from the JSON Feed spec: 12 | 13 | ```json 14 | { 15 | "version": "https://jsonfeed.org/version/1", 16 | "title": "My Example Feed", 17 | "home_page_url": "https://example.org/", 18 | "feed_url": "https://example.org/feed.json", 19 | "items": [ 20 | { 21 | "id": "2", 22 | "content_text": "This is a second item.", 23 | "url": "https://example.org/second-item" 24 | }, 25 | { 26 | "id": "1", 27 | "content_html": "

Hello, world!

", 28 | "url": "https://example.org/initial-post" 29 | } 30 | ] 31 | } 32 | ``` 33 | 34 | Simple, isn't it? Let's try just text: 35 | 36 | ``` 37 | |>>> 38 | title: My Example Feed 39 | home_page_url: https://example.org/ 40 | feed_url: https://example.org/feed.txt 41 | 42 | id: 2 43 | url: https://example.org/second-item 44 | --- 45 | This is a second item. 46 | 47 | id: 1 48 | url: https://example.org/initial-post 49 | --- 50 | Hello, world! 51 | <<<| 52 | ``` 53 | 54 | Are you serious, really? Let's try another example from the JSON Feed spec: 55 | 56 | ```json 57 | { 58 | "version": "https://jsonfeed.org/version/1", 59 | "user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json", 60 | "title": "The Record", 61 | "home_page_url": "http://therecord.co/", 62 | "feed_url": "http://therecord.co/feed.json", 63 | "items": [ 64 | { 65 | "id": "http://therecord.co/chris-parrish", 66 | "title": "Special #1 - Chris Parrish", 67 | "url": "http://therecord.co/chris-parrish", 68 | "content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.", 69 | "content_html": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.", 70 | "summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.", 71 | "date_published": "2014-05-09T14:04:00-07:00", 72 | "attachments": [ 73 | { 74 | "url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a", 75 | "mime_type": "audio/x-m4a", 76 | "size_in_bytes": 89970236, 77 | "duration_in_seconds": 6629 78 | } 79 | ] 80 | } 81 | ] 82 | } 83 | ``` 84 | 85 | Yes, the world's 1st podcasting feed in plain text ;-) Let's try: 86 | 87 | ``` 88 | |>>> 89 | comment: This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json 90 | title: The Record 91 | home_page_url: http://therecord.co/ 92 | feed_url: http://therecord.co/feed.txt 93 | 94 | id: http://therecord.co/chris-parrish 95 | title: Special #1 - Chris Parrish 96 | url: http://therecord.co/chris-parrish 97 | summary: Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled. 98 | published: 2014-05-09T14:04:00-07:00 99 | attachments: 100 | - url: http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a 101 | mime_type: audio/x-m4a 102 | size_in_bytes: 89970236 103 | duration_in_seconds: 6629 104 | --- 105 | Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. 106 | Chris's new company is Aged & Distilled with Guy English — which shipped [Napkin](2), 107 | a Mac app for visual collaboration. Chris is also the co-host of The Record. 108 | He lives on [Bainbridge Island][3], a quick ferry ride from Seattle. 109 | 110 | [1]: http://adobe.com/ 111 | [2]: http://aged-and-distilled.com/napkin/ 112 | [3]: http://www.ci.bainbridge-isl.wa.us/ 113 | <<<| 114 | ``` 115 | 116 | 117 | ## Spec(ification) - How does it work? 118 | 119 | A Feed.txt starts with a meta data block for the feed in YAML format 120 | followed by a list of items. Items start with a meta data block followed by the text 121 | using the markdown formatting conventions for structured text (headings, lists, tables, etc.) and 122 | hyperlinks. That's it. 123 | 124 | 125 | ### Dividers - Begin / Next / End 126 | 127 | Use `|>>>` to begin a Feed.txt feed. Note you use three or more `>>>` open brackets e.g. 128 | `|>>>>>>>>>>>>` also works. 129 | 130 | Use `<<<|` to end a Feed.txt feed. Again note you can use three or more `<<<` closing brackets e.g. 131 | `<<<<<<<|` also works. 132 | 133 | Use `` to break up items. That's it. 134 | 135 | 136 | 137 | 138 | ## Use JSON / JSON5 / HJSON / SON for Strucutured Meta Data - |{ }| 139 | 140 | As an alternative you can use human JSON for meta data blocks. Let's try: 141 | 142 | ``` 143 | |{ 144 | title: "My Example Feed" 145 | home_page_url: "https://example.org/" 146 | feed_url: "https://example.org/feed.txt" 147 | }/{ 148 | id: "2" 149 | url: "https://example.org/second-item" 150 | } 151 | This is a second item. 152 | }/{ 153 | id: "1" 154 | url: "https://example.org/initial-post" 155 | } 156 | Hello, world! 157 | }| 158 | ``` 159 | 160 | Are you joking? Don't, like the more human JSON style. Let's retry in "classic" JSON: 161 | 162 | ``` 163 | |{ 164 | "title": "My Example Feed", 165 | "home_page_url": "https://example.org/", 166 | "feed_url": "https://example.org/feed.txt" 167 | }/{ 168 | "id": "2", 169 | "url": "https://example.org/second-item" 170 | } 171 | This is a second item. 172 | }/{ 173 | "id": "1", 174 | "url": "https://example.org/initial-post" 175 | } 176 | Hello, world! 177 | }| 178 | ``` 179 | 180 | ### Dividers - Begin / Next / End (JSON Edition) 181 | 182 | Change `|>>>` to `|{` to begin a Feed.txt feed. Note you use one or more `{` open curly brackets e.g. `|{%raw%}{{{{{%endraw%}` also works. 183 | 184 | Change `<<<|` to `}|` to end a Feed.txt feed. Again note you can use one or more `}` closing brackets e.g. `{%raw%}}}}}{%endraw%}|` also works. 185 | 186 | Change `` to `}/{` to break up items. That's it. 187 | 188 | 189 | Sorry, there's no XML alternative ;-) 190 | 191 | 192 | ## License 193 | 194 | The Feed.TXT format and conventions are dedicated to the public domain. 195 | Use it as you please with no restrictions whatsoever. 196 | 197 | ## Questions? Comments? 198 | 199 | Send them along to the [wwwmake mailing list/forum](http://groups.google.com/group/wwwmake). Thanks. 200 | 201 | 202 | 203 | 204 | Brought to you by [Manuscripts](https://github.com/manuscripts) and friends. You might also like [Bib.TXT](http://bibtxt.github.io) ;-). 205 | 206 | 207 | 208 | 209 | -------------------------------------------------------------------------------- /feedtxt.specs/_includes/header.html: -------------------------------------------------------------------------------- 1 | 4 | 5 | 8 | 9 | 17 | 18 | 22 | 25 | -------------------------------------------------------------------------------- /feedtxt/.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | /.config 4 | /coverage/ 5 | /InstalledFiles 6 | /pkg/ 7 | /spec/reports/ 8 | /spec/examples.txt 9 | /test/tmp/ 10 | /test/version_tmp/ 11 | /tmp/ 12 | 13 | # Used by dotenv library to load environment variables. 14 | # .env 15 | 16 | ## Specific to RubyMotion: 17 | .dat* 18 | .repl_history 19 | build/ 20 | *.bridgesupport 21 | build-iPhoneOS/ 22 | build-iPhoneSimulator/ 23 | 24 | ## Specific to RubyMotion (use of CocoaPods): 25 | # 26 | # We recommend against adding the Pods directory to your .gitignore. However 27 | # you should judge for yourself, the pros and cons are mentioned at: 28 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 29 | # 30 | # vendor/Pods/ 31 | 32 | ## Documentation cache and generated files: 33 | /.yardoc/ 34 | /_yardoc/ 35 | /doc/ 36 | /rdoc/ 37 | 38 | ## Environment normalization: 39 | /.bundle/ 40 | /vendor/bundle 41 | /lib/bundler/man/ 42 | 43 | # for a library or gem, you might want to ignore these files since the code is 44 | # intended to run in multiple environments; otherwise, check them in: 45 | # Gemfile.lock 46 | # .ruby-version 47 | # .ruby-gemset 48 | 49 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: 50 | .rvmrc 51 | -------------------------------------------------------------------------------- /feedtxt/HISTORY.md: -------------------------------------------------------------------------------- 1 | 2 | ### 0.0.1 / 2017-06-11 3 | 4 | * Everything is new. First release 5 | -------------------------------------------------------------------------------- /feedtxt/Manifest.txt: -------------------------------------------------------------------------------- 1 | HISTORY.md 2 | Manifest.txt 3 | README.md 4 | Rakefile 5 | lib/feedtxt.rb 6 | lib/feedtxt/parser.rb 7 | lib/feedtxt/parser/ini.rb 8 | lib/feedtxt/parser/json.rb 9 | lib/feedtxt/parser/yaml.rb 10 | lib/feedtxt/version.rb 11 | test/feeds/spec/example.ini.txt 12 | test/feeds/spec/example.json.txt 13 | test/feeds/spec/example.yaml.txt 14 | test/feeds/spec/podcast.ini.txt 15 | test/feeds/spec/podcast.json.txt 16 | test/feeds/spec/podcast.yaml.txt 17 | test/helper.rb 18 | test/test_ini.rb 19 | test/test_json.rb 20 | test/test_scanner.rb 21 | test/test_yaml.rb 22 | -------------------------------------------------------------------------------- /feedtxt/Rakefile: -------------------------------------------------------------------------------- 1 | require 'hoe' 2 | require './lib/feedtxt/version.rb' 3 | 4 | Hoe.spec 'feedtxt' do 5 | 6 | self.version = Feedtxt::VERSION 7 | 8 | self.summary = "feedtxt - reads Feed.TXT a.k.a. RSS (Really Simple Sharing) 5.0 ;-) - feeds in text (unicode) - publish & share posts, articles, podcasts, 'n' more" 9 | self.description = summary 10 | 11 | self.urls = ['https://github.com/feedtxt/feedtxt'] 12 | 13 | self.author = 'Gerald Bauer' 14 | self.email = 'wwwmake@googlegroups.com' 15 | 16 | # switch extension to .markdown for gihub formatting 17 | self.readme_file = 'README.md' 18 | self.history_file = 'HISTORY.md' 19 | 20 | self.licenses = ['Public Domain'] 21 | 22 | ### todo 23 | ## add deps e.g. props gem for INI.load 24 | 25 | 26 | self.spec_extras = { 27 | required_ruby_version: '>= 1.9.2' 28 | } 29 | 30 | end 31 | -------------------------------------------------------------------------------- /feedtxt/lib/feedtxt.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | # core and stdlibs 5 | 6 | require 'strscan' ## StringScanner 7 | require 'json' 8 | require 'yaml' 9 | require 'date' 10 | require 'time' 11 | require 'pp' 12 | 13 | 14 | 15 | # 3rd party gems/libs 16 | require 'logutils' 17 | require 'props' ## used for IniFile.parse 18 | 19 | 20 | # our own code 21 | require 'feedtxt/version' # let it always go first 22 | require 'feedtxt/parser' 23 | require 'feedtxt/parser/json' 24 | require 'feedtxt/parser/yaml' 25 | require 'feedtxt/parser/ini' 26 | 27 | 28 | 29 | ## add shortcut / alias e.g. 30 | ## lets you use: 31 | ## Feedtxt.parse instead of Feedtxt::Parser.parse 32 | module Feedtxt 33 | def self.parse( text, opts={} ) 34 | Parser.parse( text, ) 35 | end 36 | 37 | INI = IniParser ## note: add a shortcut; lets you use Feedtxt::INI.parse 38 | YAML = YamlParser ## note: add a shortcut; lets you use Feedtxt::YAML.parse 39 | JSON = JsonParser ## note: add a shortcut; lets you use Feedttxt::JSON.parse 40 | end 41 | 42 | 43 | 44 | # say hello 45 | puts Feedtxt.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG) 46 | -------------------------------------------------------------------------------- /feedtxt/lib/feedtxt/parser.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Feedtxt 4 | 5 | 6 | class Parser 7 | 8 | include LogUtils::Logging 9 | 10 | 11 | ### convenience class/factory method 12 | def self.parse( text, opts={} ) 13 | self.new( text ).parse 14 | end 15 | 16 | ### Note: lets keep/use same API as RSS::Parser for now 17 | def initialize( text ) 18 | @text = text 19 | end 20 | 21 | 22 | def parse 23 | ## auto-detect format 24 | ## use "best" matching format (e.g. first match by pos(ition)) 25 | 26 | klass = YAML ## default to yamlparser for now 27 | pos = 9_999_999 ## todo:use MAX INTEGER or something!! 28 | 29 | json = @text.index( /#{JSON::FEED_BEGIN}/ ) 30 | if json # found e.g. not nil? incl. 0 31 | pos = json 32 | klass = JSON 33 | end 34 | 35 | ini = @text.index( /#{INI::FEED_BEGIN}/ ) 36 | if ini && ini < pos # found e.g. not nil? and match before last? 37 | pos = ini 38 | klass = INI 39 | end 40 | 41 | yaml = @text.index( /#{YAML::FEED_BEGIN}/ ) 42 | if yaml && yaml < pos # found e.g. not nil? and match before last? 43 | pos = yaml 44 | klass = YAML 45 | end 46 | 47 | feed = klass.parse( @text ) 48 | feed 49 | end # method parse 50 | 51 | end # class Parser 52 | 53 | end # module Feedtxt 54 | -------------------------------------------------------------------------------- /feedtxt/lib/feedtxt/parser/ini.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Feedtxt 4 | 5 | 6 | class IniParser 7 | 8 | include LogUtils::Logging 9 | 10 | 11 | ### convenience class/factory method 12 | def self.parse( text, opts={} ) 13 | self.new( text ).parse 14 | end 15 | 16 | ### Note: lets keep/use same API as RSS::Parser for now 17 | def initialize( text ) 18 | @text = text 19 | end 20 | 21 | 22 | 23 | ## note: 24 | ## regex excape bracket: [ to \[ 25 | ## \\ needs to get escaped twice e.g. (\\ becomes \) 26 | ## e.g. [>>> or [>>>>> 27 | FEED_BEGIN = "^[ ]*\\[>>>+[ ]*$" ## note: allow leading n trailing spaces; allow 3 or more brackets 28 | ## e.g. <<<] or <<<<<<] 29 | FEED_END = "^[ ]*<<<+\\][ ]*$" ## note: allow leading n trailing spaces; allow 3 or more brackets 30 | 31 | ## e.g. or <<>> 32 | FEED_NEXT = "^[ ]*<+/>+[ ]*$" ## pass 1: split/break up blocks 33 | ## e.g. --- or ----- 34 | FEED_META = "^[ ]*---+[ ]*$" ## pass 2: break up item into metadata and content block 35 | 36 | 37 | 38 | def parse 39 | 40 | ## find start marker e.g. [>>> 41 | ## use regex - allow three or more >>>>>> or <<<<<< 42 | ## allow spaces before and after 43 | 44 | s = StringScanner.new( @text ) 45 | 46 | prolog = s.scan_until( /(?=#{FEED_BEGIN})/ ) 47 | ## pp prolog 48 | 49 | feed_begin = s.scan( /#{FEED_BEGIN}/ ) 50 | if feed_begin.empty? ## use blank? why? why not?? 51 | ## nothing found return empty array for now; return nil - why? why not? 52 | puts "warn !!! no begin marker found e.g. |>>>" 53 | return [] 54 | end 55 | 56 | 57 | buf = s.scan_until( /(?=#{FEED_END})/ ) 58 | buf = buf.strip # remove leading and trailing whitespace 59 | 60 | feed_end = s.scan( /#{FEED_END}/ ) 61 | if feed_end.empty? ## use blank? why? why not?? 62 | ## nothing found return empty array for now; return nil - why? why not? 63 | puts "warn !!! no end marker found e.g. <<<|" 64 | return [] 65 | end 66 | 67 | 68 | #### 69 | ## pass 1: split blocks by 70 | ### note: allows <<<>>> 71 | 72 | blocks = buf.split( /#{FEED_NEXT}/ ) 73 | ## pp blocks 74 | 75 | ## 1st block is feed meta data 76 | block1st = blocks.shift ## get/remove 1st block from blocks 77 | block1st = block1st.strip ## strip leading and trailing whitespace 78 | feed_metadata = ::INI.load( block1st ) 79 | 80 | feed_items = [] 81 | blocks.each do |block| 82 | ### note: do NOT use split e.g.--- is used by markdown 83 | ## only search for first --- to split (all others get ignored) 84 | ## todo: make three dashes --- (3) not hard-coded (allow more) 85 | 86 | s2 = StringScanner.new( block ) 87 | 88 | item_metadata = s2.scan_until( /(?=#{FEED_META})/ ) 89 | item_metadata = item_metadata.strip # remove leading and trailing whitespace 90 | item_metadata = ::INI.load( item_metadata ) ## convert to hash with inifile parser 91 | 92 | feed_meta = s2.scan( /#{FEED_META}/ ) 93 | 94 | item_content = s2.rest 95 | item_content = item_content.strip # remove leading and trailing whitespace 96 | 97 | feed_items << [item_metadata, item_content] 98 | end 99 | 100 | [ feed_metadata, feed_items ] 101 | end # method parse 102 | 103 | 104 | end # class IniParser 105 | 106 | 107 | end # module Feedtxt 108 | -------------------------------------------------------------------------------- /feedtxt/lib/feedtxt/parser/json.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Feedtxt 4 | 5 | 6 | class JsonParser 7 | 8 | include LogUtils::Logging 9 | 10 | 11 | ### convenience class/factory method 12 | def self.parse( text, opts={} ) 13 | self.new( text ).parse 14 | end 15 | 16 | ### Note: lets keep/use same API as RSS::Parser for now 17 | def initialize( text ) 18 | @text = text 19 | end 20 | 21 | 22 | 23 | ## note: 24 | ## regex excape pipe: | to \| 25 | ## \\ needs to get escaped twice e.g. (\\ becomes \) 26 | ## e.g. |{ or |{{{ 27 | FEED_BEGIN = "^[ ]*\\|{+[ ]*$" ## note: allow leading n trailing spaces; allow 3 or more brackets 28 | ## e.g. }| or }}}| 29 | FEED_END = "^[ ]*}+\\|[ ]*$" ## note: allow leading n trailing spaces; allow 3 or more brackets 30 | 31 | ## e.g.}/{ or }}}/{{{ 32 | ## todo/check: also allow }///{ or } /// { why,why not? 33 | FEED_NEXT = "^[ ]*}+/{+[ ]*$" ## pass 1: split/break up blocks 34 | 35 | ## e.g. }---{ or }}}---{{{ or }-{ 36 | ## todo/check: also allow }.{ with dot why? why not? 37 | ## also allow } - { or } ---- { why? why not? 38 | FEED_META = "^[ ]*}+-+{+[ ]*$" ## pass 2: break up item into metadata and content block 39 | 40 | 41 | 42 | def parse 43 | 44 | ## find start marker e.g. |>>> 45 | ## use regex - allow three or more >>>>>> or <<<<<< 46 | ## allow spaces before and after 47 | 48 | s = StringScanner.new( @text ) 49 | 50 | prolog = s.scan_until( /(?=#{FEED_BEGIN})/ ) 51 | ## pp prolog 52 | 53 | feed_begin = s.scan( /#{FEED_BEGIN}/ ) 54 | if feed_begin.empty? ## use blank? why? why not?? 55 | ## nothing found return empty array for now; return nil - why? why not? 56 | puts "warn !!! no begin marker found e.g. |>>>" 57 | return [] 58 | end 59 | 60 | 61 | buf = s.scan_until( /(?=#{FEED_END})/ ) 62 | buf = buf.strip # remove leading and trailing whitespace 63 | 64 | feed_end = s.scan( /#{FEED_END}/ ) 65 | if feed_end.empty? ## use blank? why? why not?? 66 | ## nothing found return empty array for now; return nil - why? why not? 67 | puts "warn !!! no end marker found e.g. <<<|" 68 | return [] 69 | end 70 | 71 | 72 | #### 73 | ## pass 1: split blocks by }/{ 74 | ### note: allows }}}/{{{ 75 | 76 | blocks = buf.split( /#{FEED_NEXT}/ ) 77 | ## pp blocks 78 | 79 | ## 1st block is feed meta data 80 | block1st = blocks.shift ## get/remove 1st block from blocks 81 | block1st = block1st.strip # remove leading and trailing whitespaces 82 | feed_metadata = ::JSON.parse( "{ #{block1st} }" ) 83 | 84 | feed_items = [] 85 | blocks.each do |block| 86 | ### note: do NOT use split e.g.--- is used by markdown 87 | ## only search for first --- to split (all others get ignored) 88 | ## todo: make three dashes --- (3) not hard-coded (allow more) 89 | 90 | s2 = StringScanner.new( block ) 91 | 92 | item_metadata = s2.scan_until( /(?=#{FEED_META})/ ) 93 | item_metadata = item_metadata.strip # remove leading and trailing whitespace 94 | item_metadata = ::JSON.parse( "{ #{item_metadata} }" ) ## convert to hash with yaml 95 | 96 | feed_meta = s2.scan( /#{FEED_META}/ ) 97 | 98 | item_content = s2.rest 99 | item_content = item_content.strip # remove leading and trailing whitespace 100 | 101 | feed_items << [item_metadata, item_content] 102 | end 103 | 104 | [ feed_metadata, feed_items ] 105 | end # method parse 106 | 107 | 108 | end # class JsonParser 109 | 110 | end # module Feedtxt 111 | -------------------------------------------------------------------------------- /feedtxt/lib/feedtxt/parser/yaml.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Feedtxt 4 | 5 | 6 | class YamlParser 7 | 8 | include LogUtils::Logging 9 | 10 | 11 | ### convenience class/factory method 12 | def self.parse( text, opts={} ) 13 | self.new( text ).parse 14 | end 15 | 16 | ### Note: lets keep/use same API as RSS::Parser for now 17 | def initialize( text ) 18 | @text = text 19 | end 20 | 21 | 22 | 23 | ## note: 24 | ## regex excape pipe: | to \| 25 | ## \\ needs to get escaped twice e.g. (\\ becomes \) 26 | ## e.g. |>>> or |>>>>> 27 | FEED_BEGIN = "^[ ]*\\|>>>+[ ]*$" ## note: allow leading n trailing spaces; allow 3 or more brackets 28 | ## e.g. <<<| or <<<<<<| 29 | FEED_END = "^[ ]*<<<+\\|[ ]*$" ## note: allow leading n trailing spaces; allow 3 or more brackets 30 | 31 | ## e.g. or <<>> 32 | FEED_NEXT = "^[ ]*<+/>+[ ]*$" ## pass 1: split/break up blocks 33 | ## e.g. --- or ----- 34 | FEED_META = "^[ ]*---+[ ]*$" ## pass 2: break up item into metadata and content block 35 | 36 | 37 | 38 | def parse 39 | 40 | ## find start marker e.g. |>>> 41 | ## use regex - allow three or more >>>>>> or <<<<<< 42 | ## allow spaces before and after 43 | 44 | s = StringScanner.new( @text ) 45 | 46 | prolog = s.scan_until( /(?=#{FEED_BEGIN})/ ) 47 | ## pp prolog 48 | 49 | feed_begin = s.scan( /#{FEED_BEGIN}/ ) 50 | if feed_begin.empty? ## use blank? why? why not?? 51 | ## nothing found return empty array for now; return nil - why? why not? 52 | puts "warn !!! no begin marker found e.g. |>>>" 53 | return [] 54 | end 55 | 56 | 57 | buf = s.scan_until( /(?=#{FEED_END})/ ) 58 | buf = buf.strip # remove leading and trailing whitespace 59 | 60 | feed_end = s.scan( /#{FEED_END}/ ) 61 | if feed_end.empty? ## use blank? why? why not?? 62 | ## nothing found return empty array for now; return nil - why? why not? 63 | puts "warn !!! no end marker found e.g. <<<|" 64 | return [] 65 | end 66 | 67 | 68 | #### 69 | ## pass 1: split blocks by 70 | ### note: allows <<<>>> 71 | 72 | blocks = buf.split( /#{FEED_NEXT}/ ) 73 | ## pp blocks 74 | 75 | ## 1st block is feed meta data 76 | block1st = blocks.shift ## get/remove 1st block from blocks 77 | block1st = block1st.strip ## strip leading and trailing whitespace 78 | feed_metadata = ::YAML.load( block1st ) 79 | 80 | feed_items = [] 81 | blocks.each do |block| 82 | ### note: do NOT use split e.g.--- is used by markdown 83 | ## only search for first --- to split (all others get ignored) 84 | ## todo: make three dashes --- (3) not hard-coded (allow more) 85 | 86 | s2 = StringScanner.new( block ) 87 | 88 | item_metadata = s2.scan_until( /(?=#{FEED_META})/ ) 89 | item_metadata = item_metadata.strip # remove leading and trailing whitespace 90 | item_metadata = ::YAML.load( item_metadata ) ## convert to hash with yaml 91 | 92 | feed_meta = s2.scan( /#{FEED_META}/ ) 93 | 94 | item_content = s2.rest 95 | item_content = item_content.strip # remove leading and trailing whitespace 96 | 97 | feed_items << [item_metadata, item_content] 98 | end 99 | 100 | [ feed_metadata, feed_items ] 101 | end # method parse 102 | 103 | 104 | end # class YamlParser 105 | 106 | end # module Feedtxt 107 | -------------------------------------------------------------------------------- /feedtxt/lib/feedtxt/version.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Feedtxt 4 | 5 | MAJOR = 1 6 | MINOR = 0 7 | PATCH = 1 8 | VERSION = [MAJOR,MINOR,PATCH].join('.') 9 | 10 | def self.version 11 | VERSION 12 | end 13 | 14 | def self.banner 15 | "feedtxt/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]" 16 | end 17 | 18 | def self.root 19 | "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}" 20 | end 21 | 22 | end # module Feedtxt 23 | -------------------------------------------------------------------------------- /feedtxt/test/feeds/spec/example.ini.txt: -------------------------------------------------------------------------------- 1 | [>>> 2 | title = My Example Feed 3 | home_page_url = https://example.org/ 4 | feed_url = https://example.org/feed.txt 5 | 6 | id = 2 7 | url = https://example.org/second-item 8 | --- 9 | This is a second item. 10 | 11 | id = 1 12 | url = https://example.org/initial-post 13 | --- 14 | Hello, world! 15 | <<<] 16 | -------------------------------------------------------------------------------- /feedtxt/test/feeds/spec/example.json.txt: -------------------------------------------------------------------------------- 1 | |{ 2 | "title": "My Example Feed", 3 | "home_page_url": "https://example.org/", 4 | "feed_url": "https://example.org/feed.txt" 5 | }/{ 6 | "id": "2", 7 | "url": "https://example.org/second-item" 8 | }-{ 9 | This is a second item. 10 | }/{ 11 | "id": "1", 12 | "url": "https://example.org/initial-post" 13 | }-{ 14 | Hello, world! 15 | }| 16 | -------------------------------------------------------------------------------- /feedtxt/test/feeds/spec/example.yaml.txt: -------------------------------------------------------------------------------- 1 | |>>> 2 | title: "My Example Feed" 3 | home_page_url: "https://example.org/" 4 | feed_url: "https://example.org/feed.txt" 5 | 6 | id: "2" 7 | url: "https://example.org/second-item" 8 | --- 9 | This is a second item. 10 | 11 | id: "1" 12 | url: "https://example.org/initial-post" 13 | --- 14 | Hello, world! 15 | <<<| 16 | -------------------------------------------------------------------------------- /feedtxt/test/feeds/spec/podcast.ini.txt: -------------------------------------------------------------------------------- 1 | [>>> 2 | comment = This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json 3 | title = The Record 4 | home_page_url = http://therecord.co/ 5 | feed_url = http://therecord.co/feed.txt 6 | 7 | id = http://therecord.co/chris-parrish 8 | title = Special #1 - Chris Parrish 9 | url = http://therecord.co/chris-parrish 10 | summary = Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled. 11 | published = 2014-05-09T14:04:00-07:00 12 | [attachments] 13 | url = http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a 14 | mime_type = audio/x-m4a 15 | size_in_bytes = 89970236 16 | duration_in_seconds = 6629 17 | --- 18 | Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. 19 | Chris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2), 20 | a Mac app for visual collaboration. Chris is also the co-host of The Record. 21 | He lives on [Bainbridge Island][3], a quick ferry ride from Seattle. 22 | 23 | [1]: http://adobe.com/ 24 | [2]: http://aged-and-distilled.com/napkin/ 25 | [3]: http://www.ci.bainbridge-isl.wa.us/ 26 | <<<] 27 | -------------------------------------------------------------------------------- /feedtxt/test/feeds/spec/podcast.json.txt: -------------------------------------------------------------------------------- 1 | |{ 2 | "comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json", 3 | "title": "The Record", 4 | "home_page_url": "http://therecord.co/", 5 | "feed_url": "http://therecord.co/feed.txt" 6 | }/{ 7 | "id": "http://therecord.co/chris-parrish", 8 | "title": "Special #1 - Chris Parrish", 9 | "url": "http://therecord.co/chris-parrish", 10 | "summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.", 11 | "published": "2014-05-09T14:04:00-07:00", 12 | "attachments": [ 13 | { 14 | "url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a", 15 | "mime_type": "audio/x-m4a", 16 | "size_in_bytes": 89970236, 17 | "duration_in_seconds": 6629 18 | } 19 | ] 20 | }-{ 21 | Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. 22 | Chris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2), 23 | a Mac app for visual collaboration. Chris is also the co-host of The Record. 24 | He lives on [Bainbridge Island][3], a quick ferry ride from Seattle. 25 | 26 | [1]: http://adobe.com/ 27 | [2]: http://aged-and-distilled.com/napkin/ 28 | [3]: http://www.ci.bainbridge-isl.wa.us/ 29 | }| 30 | -------------------------------------------------------------------------------- /feedtxt/test/feeds/spec/podcast.yaml.txt: -------------------------------------------------------------------------------- 1 | |>>> 2 | comment: "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json" 3 | title: "The Record" 4 | home_page_url: "http://therecord.co/" 5 | feed_url: "http://therecord.co/feed.txt" 6 | 7 | id: "http://therecord.co/chris-parrish" 8 | title: "Special #1 - Chris Parrish" 9 | url: "http://therecord.co/chris-parrish" 10 | summary: "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled." 11 | published: 2014-05-09T14:04:00-07:00 12 | attachments: 13 | - url: "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a" 14 | mime_type: "audio/x-m4a" 15 | size_in_bytes: 89970236 16 | duration_in_seconds: 6629 17 | --- 18 | Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. 19 | Chris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2), 20 | a Mac app for visual collaboration. Chris is also the co-host of The Record. 21 | He lives on [Bainbridge Island][3], a quick ferry ride from Seattle. 22 | 23 | [1]: http://adobe.com/ 24 | [2]: http://aged-and-distilled.com/napkin/ 25 | [3]: http://www.ci.bainbridge-isl.wa.us/ 26 | <<<| 27 | -------------------------------------------------------------------------------- /feedtxt/test/helper.rb: -------------------------------------------------------------------------------- 1 | ## $:.unshift(File.dirname(__FILE__)) 2 | 3 | 4 | ## minitest setup 5 | 6 | require 'minitest/autorun' 7 | 8 | require 'logutils' 9 | require 'textutils' 10 | 11 | 12 | ## our own code 13 | require 'feedtxt' 14 | 15 | 16 | 17 | LogUtils::Logger.root.level = :debug 18 | 19 | 20 | def read_text( name ) 21 | text = File.read( "#{Feedtxt.root}/test/feeds/#{name}.txt" ) 22 | text 23 | end 24 | -------------------------------------------------------------------------------- /feedtxt/test/test_ini.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_ini.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | class TestIni < MiniTest::Test 11 | 12 | def test_example 13 | 14 | text = read_text( 'spec/example.ini' ) 15 | pp text 16 | 17 | exp = [ 18 | {"title"=>"My Example Feed", 19 | "home_page_url"=>"https://example.org/", 20 | "feed_url"=>"https://example.org/feed.txt"}, 21 | [[ 22 | {"id"=>"2", "url"=>"https://example.org/second-item"}, 23 | "This is a second item." 24 | ], 25 | [ 26 | {"id"=>"1", "url"=>"https://example.org/initial-post"}, 27 | "Hello, world!" 28 | ]]] 29 | 30 | assert_equal exp, Feedtxt::INI.parse( text ) 31 | assert_equal exp, Feedtxt.parse( text ) ## try shortcut alias too 32 | end 33 | 34 | def test_podcast 35 | 36 | text = read_text( 'spec/podcast.ini' ) 37 | pp text 38 | 39 | exp = [{"comment"=> 40 | "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json", 41 | "title"=>"The Record", 42 | "home_page_url"=>"http://therecord.co/", 43 | "feed_url"=>"http://therecord.co/feed.txt"}, 44 | [[{"id"=>"http://therecord.co/chris-parrish", 45 | "title"=>"Special", 46 | "url"=>"http://therecord.co/chris-parrish", 47 | "summary"=> 48 | "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.", 49 | "published"=>"2014-05-09T14:04:00-07:00", 50 | "attachments"=> 51 | {"url"=>"http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a", 52 | "mime_type"=>"audio/x-m4a", 53 | "size_in_bytes"=>"89970236", 54 | "duration_in_seconds"=>"6629"}}, 55 | "Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage.\nChris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2),\na Mac app for visual collaboration. Chris is also the co-host of The Record.\nHe lives on [Bainbridge Island][3], a quick ferry ride from Seattle.\n\n[1]: http://adobe.com/\n[2]: http://aged-and-distilled.com/napkin/\n[3]: http://www.ci.bainbridge-isl.wa.us/"]]] 56 | 57 | assert_equal exp, Feedtxt::INI.parse( text ) 58 | assert_equal exp, Feedtxt.parse( text ) ## try shortcut alias too 59 | end 60 | 61 | 62 | end # class TestIni 63 | -------------------------------------------------------------------------------- /feedtxt/test/test_json.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_json.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | class TestJson < MiniTest::Test 11 | 12 | def test_example 13 | 14 | text = read_text( 'spec/example.json' ) 15 | pp text 16 | 17 | exp = [ 18 | {"title"=>"My Example Feed", 19 | "home_page_url"=>"https://example.org/", 20 | "feed_url"=>"https://example.org/feed.txt"}, 21 | [[ 22 | {"id"=>"2", "url"=>"https://example.org/second-item"}, 23 | "This is a second item." 24 | ], 25 | [ 26 | {"id"=>"1", "url"=>"https://example.org/initial-post"}, 27 | "Hello, world!" 28 | ]]] 29 | 30 | assert_equal exp, Feedtxt::JSON.parse( text ) 31 | assert_equal exp, Feedtxt.parse( text ) ## try shortcut alias too 32 | end 33 | 34 | def test_podcast 35 | 36 | text = read_text( 'spec/podcast.json' ) 37 | pp text 38 | 39 | exp =[{"comment"=> 40 | "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json", 41 | "title"=>"The Record", 42 | "home_page_url"=>"http://therecord.co/", 43 | "feed_url"=>"http://therecord.co/feed.txt"}, 44 | [[{"id"=>"http://therecord.co/chris-parrish", 45 | "title"=>"Special #1 - Chris Parrish", 46 | "url"=>"http://therecord.co/chris-parrish", 47 | "summary"=> 48 | "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.", 49 | "published"=> "2014-05-09T14:04:00-07:00", 50 | "attachments"=> 51 | [{"url"=> 52 | "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a", 53 | "mime_type"=>"audio/x-m4a", 54 | "size_in_bytes"=>89970236, 55 | "duration_in_seconds"=>6629}]}, 56 | "Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage.\nChris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2),\na Mac app for visual collaboration. Chris is also the co-host of The Record.\nHe lives on [Bainbridge Island][3], a quick ferry ride from Seattle.\n\n[1]: http://adobe.com/\n[2]: http://aged-and-distilled.com/napkin/\n[3]: http://www.ci.bainbridge-isl.wa.us/"]]] 57 | 58 | assert_equal exp, Feedtxt::JSON.parse( text ) 59 | assert_equal exp, Feedtxt.parse( text ) ## try shortcut alias too 60 | end 61 | 62 | 63 | end # class TestYaml 64 | -------------------------------------------------------------------------------- /feedtxt/test/test_scanner.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_scanner.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | class TestScanner < MiniTest::Test 11 | 12 | ## note: 13 | ## regex excape pipe: | to \| 14 | ## note: \\ needs to get escaped twice e.g. (\\ becomes \) 15 | FEED_BEGIN = %{^[ ]*\\|>>>+[ ]*$} 16 | FEED_END = %{^[ ]*<<<+\\|[ ]*$} 17 | 18 | def test_scan 19 | 20 | text =<>> 23 | title: "My Example Feed" 24 | home_page_url: "https://example.org/" 25 | feed_url: "https://example.org/feed.txt" 26 | 27 | id: "2" 28 | url: "https://example.org/second-item" 29 | --- 30 | This is a second item. 31 | 32 | id: "1" 33 | url: "https://example.org/initial-post" 34 | --- 35 | Hello, world! 36 | <<<| 37 | TXT 38 | 39 | s = StringScanner.new( text ) 40 | 41 | prolog = s.scan_until( /(?=#{FEED_BEGIN})/ ) 42 | pp prolog 43 | 44 | feed_begin = s.scan( /#{FEED_BEGIN}/ ) 45 | assert_equal '|>>>', feed_begin 46 | 47 | body = s.scan_until( /(?=#{FEED_END})/ ) 48 | pp body 49 | 50 | feed_end = s.scan( /#{FEED_END}/ ) 51 | assert_equal '<<<|', feed_end 52 | 53 | assert true 54 | end 55 | 56 | end # class TestScanner 57 | -------------------------------------------------------------------------------- /feedtxt/test/test_yaml.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_yaml.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | class TestYaml < MiniTest::Test 11 | 12 | def test_example 13 | 14 | text = read_text( 'spec/example.yaml' ) 15 | pp text 16 | 17 | exp = [ 18 | {"title"=>"My Example Feed", 19 | "home_page_url"=>"https://example.org/", 20 | "feed_url"=>"https://example.org/feed.txt"}, 21 | [[ 22 | {"id"=>"2", "url"=>"https://example.org/second-item"}, 23 | "This is a second item." 24 | ], 25 | [ 26 | {"id"=>"1", "url"=>"https://example.org/initial-post"}, 27 | "Hello, world!" 28 | ]]] 29 | 30 | assert_equal exp, Feedtxt::YAML.parse( text ) 31 | assert_equal exp, Feedtxt.parse( text ) ## try shortcut alias too 32 | end 33 | 34 | def test_podcast 35 | 36 | text = read_text( 'spec/podcast.yaml' ) 37 | pp text 38 | 39 | exp =[{"comment"=> 40 | "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json", 41 | "title"=>"The Record", 42 | "home_page_url"=>"http://therecord.co/", 43 | "feed_url"=>"http://therecord.co/feed.txt"}, 44 | [[{"id"=>"http://therecord.co/chris-parrish", 45 | "title"=>"Special #1 - Chris Parrish", 46 | "url"=>"http://therecord.co/chris-parrish", 47 | "summary"=> 48 | "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.", 49 | "published"=>DateTime.new( 2014, 5, 9, 23, 4, 0, '+02'), 50 | "attachments"=> 51 | [{"url"=> 52 | "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a", 53 | "mime_type"=>"audio/x-m4a", 54 | "size_in_bytes"=>89970236, 55 | "duration_in_seconds"=>6629}]}, 56 | "Chris has worked at [Adobe][1] and as a founder of Rogue Sheep, which won an Apple Design Award for Postage.\nChris's new company is Aged & Distilled with Guy English - which shipped [Napkin](2),\na Mac app for visual collaboration. Chris is also the co-host of The Record.\nHe lives on [Bainbridge Island][3], a quick ferry ride from Seattle.\n\n[1]: http://adobe.com/\n[2]: http://aged-and-distilled.com/napkin/\n[3]: http://www.ci.bainbridge-isl.wa.us/"]]] 57 | 58 | assert_equal exp, Feedtxt::YAML.parse( text ) 59 | assert_equal exp, Feedtxt.parse( text ) ## try shortcut alias too 60 | end 61 | 62 | 63 | end # class TestYaml 64 | -------------------------------------------------------------------------------- /hyperdata/.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | /.config 4 | /coverage/ 5 | /InstalledFiles 6 | /pkg/ 7 | /spec/reports/ 8 | /spec/examples.txt 9 | /test/tmp/ 10 | /test/version_tmp/ 11 | /tmp/ 12 | 13 | # Used by dotenv library to load environment variables. 14 | # .env 15 | 16 | ## Specific to RubyMotion: 17 | .dat* 18 | .repl_history 19 | build/ 20 | *.bridgesupport 21 | build-iPhoneOS/ 22 | build-iPhoneSimulator/ 23 | 24 | ## Specific to RubyMotion (use of CocoaPods): 25 | # 26 | # We recommend against adding the Pods directory to your .gitignore. However 27 | # you should judge for yourself, the pros and cons are mentioned at: 28 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 29 | # 30 | # vendor/Pods/ 31 | 32 | ## Documentation cache and generated files: 33 | /.yardoc/ 34 | /_yardoc/ 35 | /doc/ 36 | /rdoc/ 37 | 38 | ## Environment normalization: 39 | /.bundle/ 40 | /vendor/bundle 41 | /lib/bundler/man/ 42 | 43 | # for a library or gem, you might want to ignore these files since the code is 44 | # intended to run in multiple environments; otherwise, check them in: 45 | # Gemfile.lock 46 | # .ruby-version 47 | # .ruby-gemset 48 | 49 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: 50 | .rvmrc 51 | -------------------------------------------------------------------------------- /hyperdata/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ### 0.0.1 / 2017-06-11 3 | 4 | * Everything is new. First release 5 | -------------------------------------------------------------------------------- /hyperdata/Manifest.txt: -------------------------------------------------------------------------------- 1 | HISTORY.md 2 | Manifest.txt 3 | README.md 4 | Rakefile 5 | lib/hyperdata.rb 6 | lib/hyperdata/version.rb 7 | -------------------------------------------------------------------------------- /hyperdata/README.md: -------------------------------------------------------------------------------- 1 | # hyperdata gem - turn hypertext from web pages into structured data (supports Feed.HTML n friends) 2 | 3 | 4 | * home :: [github.com/feedhtml/hyperdata](https://github.com/feedhtml/hyperdata) 5 | * bugs :: [github.com/feedhtml/hyperdata/issues](https://github.com/feedhtml/hyperdata/issues) 6 | * gem :: [rubygems.org/gems/hyperdata](https://rubygems.org/gems/hyperdata) 7 | * rdoc :: [rubydoc.info/gems/hyperdata](http://rubydoc.info/gems/hyperdata) 8 | * forum :: [groups.google.com/group/wwwmake](http://groups.google.com/group/wwwmake) 9 | 10 | 11 | ## What's Feed.HTML? - A Free Feeds Format in HyperText Markup Language (HTML) w/ Structured Meta Data 12 | 13 | What's Feed.HTML? Let's start with an example from the Microformats v2 `h-entry` spec: 14 | 15 | ``` html 16 |
17 |

Microformats are amazing

18 |

Published by W. Developer 19 | on 20 | 21 |

In which I extoll the virtues of using microformats.

22 | 23 |
24 |

Blah blah blah

25 |
26 |
27 | ``` 28 | 29 | Let's try to make it simpler and easier. Why in 2017 still (re)use `class` for microformats / microdata? 30 | Let's use `o` for object types / structs / scopes and `x` for (object) props / property keys: 31 | 32 | ``` html 33 |
34 |

Microformats are amazing

35 |

Published by W. Developer 36 | on 37 | 38 |

In which I extoll the virtues of using microformats.

39 | 40 |
41 |

Blah blah blah

42 |
43 |
44 | ``` 45 | 46 | Why `o` and `x`? and not let's say `p` and `q`? The idea is to use letters that are not already used in single-letter tags 47 | and that are easy to remember - think: tic-tac-toe-like ;-) 48 | 49 | 50 | Parsed to JSON resulting in: 51 | 52 | ``` json 53 | { 54 | "title": "Microformats are amazing", 55 | "author": "W. Developer", 56 | "card": { "name": "W. Developer", 57 | "url": "http://example.com" 58 | }, 59 | "published": "2013-06-13 12:00:00", 60 | "summary": "In which I extoll the virtues of using microformats.", 61 | "content": "

Blah blah blah

" 62 | } 63 | 64 | ``` 65 | 66 | ### Shortcuts / Alternatives 67 | 68 | #### Use hfeed / hitem / hcard 69 | 70 | As an alternative you can use `hfeed` or `feed` (for `o=feed`), `hitem` or `item` (for `o=item`), 71 | `hcard` or `item` (for `o=card`) shortcuts. Let's (re)try: 72 | 73 | ``` html 74 |
75 |

Microformats are amazing

76 |

Published by W. Developer 77 | on 78 | 79 |

In which I extoll the virtues of using microformats.

80 | 81 |
82 |

Blah blah blah

83 |
84 |
85 | ``` 86 | 87 | 88 | #### Use "predefined" convention over configuration structures 89 | 90 | As an alternative you can use the "recommend" predefined convention over configuration 91 | structure. Let's (re)try: 92 | 93 | ``` html 94 |
95 |

Microformats are amazing

96 |

Published by W. Developer 97 | on 98 | 99 |

In which I extoll the virtues of using microformats.

100 | 101 |
102 |

Blah blah blah

103 |
104 |
105 | ``` 106 | 107 | E.g.: 108 | 109 | - Use article for your item. 110 | - Use heading (h1) for your title. 111 | - The first paragraph (p) for your metadata block with author and published date. 112 | - The first time (time) is the published date. 113 | - The first anchor link (a) is the author. 114 | - Optional: The second paragraph (p) is the summary. 115 | - The first division (div) is the content. 116 | 117 | 118 | 119 | ## Usage 120 | 121 | To be done. 122 | 123 | 124 | ## License 125 | 126 | ![](https://publicdomainworks.github.io/buttons/zero88x31.png) 127 | 128 | The Feed.HTML format & conventions 129 | and the `hyperdata` scripts are dedicated to the public domain. 130 | Use it as you please with no restrictions whatsoever. 131 | 132 | ## Questions? Comments? 133 | 134 | Send them along to the [wwwmake Forum/Mailing List](http://groups.google.com/group/wwwmake). 135 | Thanks! 136 | 137 | -------------------------------------------------------------------------------- /hyperdata/Rakefile: -------------------------------------------------------------------------------- 1 | require 'hoe' 2 | require './lib/hyperdata/version.rb' 3 | 4 | Hoe.spec 'hyperdata' do 5 | 6 | self.version = Hyperdata::VERSION 7 | 8 | self.summary = "hyperdata - turn hypertext from web pages into structured data (supports Feed.HTML n friends)" 9 | self.description = summary 10 | 11 | self.urls = ['https://github.com/feedhtml/hyperdata'] 12 | 13 | self.author = 'Gerald Bauer' 14 | self.email = 'wwwmake@googlegroups.com' 15 | 16 | # switch extension to .markdown for gihub formatting 17 | self.readme_file = 'README.md' 18 | self.history_file = 'HISTORY.md' 19 | 20 | self.licenses = ['Public Domain'] 21 | 22 | self.spec_extras = { 23 | required_ruby_version: '>= 1.9.2' 24 | } 25 | 26 | end 27 | -------------------------------------------------------------------------------- /hyperdata/lib/hyperdata.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | # core and stdlibs 5 | 6 | require 'json' 7 | require 'date' 8 | require 'time' 9 | require 'pp' 10 | 11 | 12 | # 3rd party gems/libs 13 | require 'logutils' 14 | 15 | require 'nokogiri' 16 | 17 | 18 | # our own code 19 | require 'hyperdata/version' # let it always go first 20 | 21 | require 'hyperdata/feed' 22 | require 'hyperdata/item' 23 | 24 | require 'hyperdata/builder/article' 25 | require 'hyperdata/parser' 26 | 27 | 28 | 29 | # say hello 30 | puts Hyperdata.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG) 31 | -------------------------------------------------------------------------------- /hyperdata/lib/hyperdata/builder/article.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Hyperdata 4 | 5 | class ArticleFeedBuilder 6 | 7 | include LogUtils::Logging 8 | 9 | 10 | def self.build( doc ) 11 | feed = self.new( doc ) 12 | feed.to_feed 13 | end 14 | 15 | def initialize( doc ) 16 | @feed = build_feed( doc ) 17 | end 18 | 19 | def to_feed() @feed; end 20 | 21 | 22 | 23 | 24 | def build_feed( doc ) 25 | feed = Feed.new 26 | 27 | ## todo: find title from page_url 28 | 29 | articles = doc.css( 'article' ) 30 | pp articles.size 31 | pp articles 32 | 33 | articles.each do |article| 34 | feed.items << build_item( article ) 35 | end 36 | 37 | feed # return new feed 38 | end # method build_feed 39 | 40 | 41 | 42 | 43 | def build_item( ht ) 44 | item = Item.new # Item.new 45 | 46 | ## check for h1 47 | 48 | headings = ht.css( 'h1' ) 49 | if headings.any? 50 | item.title = headings[0].text 51 | end 52 | 53 | paras = ht.css( 'p' ) 54 | if paras[1] ## quick hack: for now assume 2nd para is summary if present 55 | item.summary = paras[1].text 56 | end 57 | 58 | item 59 | end # method build_item 60 | 61 | end # ArticleFeedBuilder 62 | end # Hyperdata 63 | -------------------------------------------------------------------------------- /hyperdata/lib/hyperdata/feed.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Hyperdata 4 | 5 | class Feed 6 | 7 | attr_accessor :title 8 | attr_accessor :url ## todo - add alias site_url/home_page_url/page_url - why? why not?? 9 | attr_accessor :feed_url 10 | 11 | attr_accessor :items 12 | 13 | def initialize 14 | ## note: make items empty arrays on startup (e.g. not nil) 15 | @items = [] 16 | end 17 | 18 | end # class Feed 19 | 20 | end # module Hyperdata 21 | -------------------------------------------------------------------------------- /hyperdata/lib/hyperdata/item.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Hyperdata 4 | 5 | class Item 6 | 7 | attr_accessor :title 8 | attr_accessor :url 9 | 10 | 11 | ## note: only content/content_html should use html; 12 | ## all others (e.g. title/summary/content_text) shoud be plain (vanilla) text 13 | 14 | 15 | def content?() @content.nil? == false; end 16 | attr_accessor :content 17 | 18 | ## note: content_html is an alias for content 19 | ## will hold type html/xhtml/html-escaped - check if always converted to string by parser ?? 20 | alias :content_html :content 21 | alias :content_html= :content= 22 | alias :content_html? :content? 23 | 24 | 25 | def content_text?() @content_text.nil? == false; end 26 | attr_accessor :content_text 27 | 28 | 29 | 30 | def summary?() @summary.nil? == false; end 31 | attr_accessor :summary 32 | 33 | 34 | def updated?() @updated.nil? == false; end 35 | attr_accessor :updated 36 | attr_accessor :updated_local # "unparsed" local datetime as in feed (NOT converted to utc) 37 | 38 | def published?() @published.nil? == false; end 39 | attr_accessor :published # note: published is basically an alias for created 40 | attr_accessor :published_local # "unparsed" local datetime as in feed (NOT converted to utc) 41 | 42 | attr_accessor :id 43 | 44 | end # class Item 45 | 46 | end # module Hyperdata 47 | -------------------------------------------------------------------------------- /hyperdata/lib/hyperdata/parser.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | module Hyperdata 5 | 6 | 7 | class Parser 8 | 9 | include LogUtils::Logging 10 | 11 | 12 | ### convenience class/factory method 13 | def self.parse( text, opts={} ) 14 | self.new( text ).parse 15 | end 16 | 17 | ### Note: lets keep/use same API as RSS::Parser for now 18 | def initialize( text ) 19 | @text = text 20 | end 21 | 22 | 23 | 24 | def parse 25 | @doc = Nokogiri::HTML( @text ) 26 | 27 | @feed = ArticleFeedBuilder.build( @doc ) 28 | @feed # return feed for now (use a (Hyper)FeedParser instead of "generic" Parser - why? why not?) 29 | end # method parse 30 | 31 | 32 | end # class Parser 33 | end # module Hyperdata 34 | -------------------------------------------------------------------------------- /hyperdata/lib/hyperdata/version.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Hyperdata 4 | 5 | MAJOR = 0 6 | MINOR = 0 7 | PATCH = 1 8 | VERSION = [MAJOR,MINOR,PATCH].join('.') 9 | 10 | def self.version 11 | VERSION 12 | end 13 | 14 | def self.banner 15 | "hyperdata/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]" 16 | end 17 | 18 | def self.root 19 | "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}" 20 | end 21 | 22 | end # module Hyperdata 23 | -------------------------------------------------------------------------------- /hyperdata/sandbox/dumps/article.html.txt: -------------------------------------------------------------------------------- 1 | #(Document:0x20537e8 { 2 | name = "document", 3 | children = [ 4 | #(DTD:0x2053314 { name = "html" }), 5 | #(Element:0x2052f3c { 6 | name = "html", 7 | children = [ 8 | #(Element:0x2052a20 { 9 | name = "body", 10 | children = [ 11 | #(Element:0x2052534 { 12 | name = "article", 13 | children = [ 14 | #(Text "\n "), 15 | #(Element:0x2057dfc { 16 | name = "h1", 17 | children = [ #(Text "Microformats are amazing")] 18 | }), 19 | #(Text "\n "), 20 | #(Element:0x205734c { 21 | name = "p", 22 | children = [ 23 | #(Text "Published by "), 24 | #(Element:0x2056cd4 { 25 | name = "a", 26 | attributes = [ 27 | #(Attr:0x2056b48 { 28 | name = "href", 29 | value = "http://example.com" 30 | })], 31 | children = [ #(Text "W. Developer")] 32 | }), 33 | #(Text "\n on "), 34 | #(Element:0x205bc60 { 35 | name = "time", 36 | attributes = [ 37 | #(Attr:0x205bab0 { 38 | name = "datetime", 39 | value = "2013-06-13 12:00:00" 40 | })], 41 | children = [ 42 | #(Text "13"), 43 | #(Element:0x205af70 { 44 | name = "sup", 45 | children = [ #(Text "th")] 46 | }), 47 | #(Text " June 2013")] 48 | }), 49 | #(Text "\n\n ")] 50 | }), 51 | #(Element:0x205a2f8 { 52 | name = "p", 53 | children = [ 54 | #(Text "In which I extoll the virtues of using microformats.")] 55 | }), 56 | #(Text "\n\n "), 57 | #(Element:0x205f998 { 58 | name = "div", 59 | children = [ 60 | #(Text "\n "), 61 | #(Element:0x205f320 { 62 | name = "p", 63 | children = [ #(Text "Blah blah blah")] 64 | }), 65 | #(Text "\n ")] 66 | }), 67 | #(Text "\n")] 68 | }), 69 | #(Text "\n")] 70 | })] 71 | })] 72 | }) 73 | -------------------------------------------------------------------------------- /hyperdata/sandbox/dumps/o-item.html.txt: -------------------------------------------------------------------------------- 1 | #(Document:0x210b7c0 { 2 | name = "document", 3 | children = [ 4 | #(DTD:0x210b2ec { name = "html" }), 5 | #(Element:0x210af14 { 6 | name = "html", 7 | children = [ 8 | #(Element:0x210a9f8 { 9 | name = "body", 10 | children = [ 11 | #(Element:0x210a50c { 12 | name = "article", 13 | attributes = [ #(Attr:0x210a26c { name = "o", value = "item" })], 14 | children = [ 15 | #(Text "\n "), 16 | #(Element:0x210f72c { 17 | name = "h1", 18 | attributes = [ 19 | #(Attr:0x210f5a0 { name = "x", value = "title" })], 20 | children = [ #(Text "Microformats are amazing")] 21 | }), 22 | #(Text "\n "), 23 | #(Element:0x210e730 { 24 | name = "p", 25 | children = [ 26 | #(Text "Published by "), 27 | #(Element:0x210e0b8 { 28 | name = "a", 29 | attributes = [ 30 | #(Attr:0x2113f14 { name = "o", value = "card" }), 31 | #(Attr:0x2113f08 { name = "x", value = "author" }), 32 | #(Attr:0x2113efc { 33 | name = "href", 34 | value = "http://example.com" 35 | })], 36 | children = [ #(Text "W. Developer")] 37 | }), 38 | #(Text "\n on "), 39 | #(Element:0x211269c { 40 | name = "time", 41 | attributes = [ 42 | #(Attr:0x21124ec { name = "x", value = "published" }), 43 | #(Attr:0x21124e0 { 44 | name = "datetime", 45 | value = "2013-06-13 12:00:00" 46 | })], 47 | children = [ 48 | #(Text "13"), 49 | #(Element:0x21174c0 { 50 | name = "sup", 51 | children = [ #(Text "th")] 52 | }), 53 | #(Text " June 2013")] 54 | }), 55 | #(Text "\n\n ")] 56 | }), 57 | #(Element:0x2116848 { 58 | name = "p", 59 | attributes = [ 60 | #(Attr:0x2116698 { name = "x", value = "summary" })], 61 | children = [ 62 | #(Text "In which I extoll the virtues of using microformats.")] 63 | }), 64 | #(Text "\n \n "), 65 | #(Element:0x211b8d0 { 66 | name = "div", 67 | attributes = [ 68 | #(Attr:0x211b720 { name = "x", value = "content" })], 69 | children = [ 70 | #(Text "\n "), 71 | #(Element:0x211ac40 { 72 | name = "p", 73 | children = [ #(Text "Blah blah blah")] 74 | }), 75 | #(Text "\n ")] 76 | }), 77 | #(Text "\n")] 78 | }), 79 | #(Text "\n")] 80 | })] 81 | })] 82 | }) 83 | 84 | -------------------------------------------------------------------------------- /hyperdata/test/feeds/spec/article.html: -------------------------------------------------------------------------------- 1 |
2 |

Microformats are amazing

3 |

Published by W. Developer 4 | on 5 | 6 |

In which I extoll the virtues of using microformats.

7 | 8 |
9 |

Blah blah blah

10 |
11 |
12 | -------------------------------------------------------------------------------- /hyperdata/test/feeds/spec/o/item.html: -------------------------------------------------------------------------------- 1 |
2 |

Microformats are amazing

3 |

Published by W. Developer 4 | on 5 | 6 |

In which I extoll the virtues of using microformats.

7 | 8 |
9 |

Blah blah blah

10 |
11 |
12 | -------------------------------------------------------------------------------- /hyperdata/test/helper.rb: -------------------------------------------------------------------------------- 1 | ## $:.unshift(File.dirname(__FILE__)) 2 | 3 | 4 | ## minitest setup 5 | 6 | require 'minitest/autorun' 7 | 8 | require 'logutils' 9 | require 'textutils' 10 | 11 | 12 | ## our own code 13 | require 'hyperdata' 14 | 15 | 16 | 17 | LogUtils::Logger.root.level = :debug 18 | 19 | 20 | def read_text( name ) 21 | text = File.read( "#{Hyperdata.root}/test/feeds/#{name}.html" ) 22 | text 23 | end 24 | -------------------------------------------------------------------------------- /hyperdata/test/test_article.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_article.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | class TestArticle < MiniTest::Test 11 | 12 | def test_article 13 | text = read_text( 'spec/article' ) 14 | ## text = read_text( 'spec/o/item' ) 15 | feed = Hyperdata::Parser.parse( text ) 16 | pp feed 17 | 18 | assert true 19 | end 20 | 21 | end # class TestArticle 22 | -------------------------------------------------------------------------------- /hyperdata/test/test_version.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_version.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | class TestVersion < MiniTest::Test 11 | 12 | def test_version 13 | 14 | puts "Hyperdata: #{Hyperdata::VERSION}" 15 | 16 | assert true 17 | end 18 | 19 | end # class TestVersion 20 | --------------------------------------------------------------------------------