├── LICENSE
├── README.md
├── subreddit-top
└── the-daily-show-between-the-scenes


/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Collection of small scripts to generate feeds
 2 | 
 3 | These are mostly ready to run. The only thing you need to change is the line that indicates where the feed should be saved.
 4 | 
 5 | ### subreddit-top
 6 | Generates feed of top posts of chosen subreddits.
 7 | 
 8 | ### the-daily-show-between-the-scene
 9 | Generates feed for [The Daily Show](https://www.youtube.com/channel/UCwWhs_6x42TyRM4Wstoq8HA/)’s “Between the Scenes” videos.
10 | 
11 | #### License
12 | The Unlicense (Public Domain, essentially)
13 | 


--------------------------------------------------------------------------------
/subreddit-top:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env ruby
  2 | # frozen_string_literal: true
  3 | 
  4 | require 'cgi'
  5 | require 'json'
  6 | require 'net/http'
  7 | require 'nokogiri'
  8 | require 'uri'
  9 | require 'pathname'
 10 | 
 11 | ########## EDIT ##########
 12 | # Configure feed info
 13 | Site_url = URI.parse('https://reddit.com/')
 14 | Feed_title = 'Subreddit top post amalgamation'
 15 | 
 16 | ########## LEAVE ##########
 17 | # Check for common required environment variables
 18 | begin
 19 |   Feed_local_dir = Pathname.new(ENV['FEED_LOCAL_DIR'])
 20 |   Feed_web_dir = Pathname.new(ENV['FEED_WEB_DIR'])
 21 |   Feed_settings_dir = Pathname.new(ENV['FEED_SETTINGS_DIR'])
 22 | 
 23 |   raise Errno::ENOENT unless Feed_local_dir.directory? && Feed_web_dir && Feed_settings_dir.directory?
 24 | rescue TypeError, Errno::ENOENT
 25 |   abort <<~ERROR
 26 |     Some required environment variables are not set correctly.
 27 |     Full list to verify:
 28 | 
 29 |     FEED_LOCAL_DIR: path to directory to read and write feed
 30 |     FEED_WEB_DIR: url to web directory hosting the feed
 31 |     FEED_SETTINGS_DIR: path to directory containing feed script config files
 32 |   ERROR
 33 | end
 34 | 
 35 | # Prepare the feed
 36 | Feed_slug = Feed_title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
 37 | Feed_file = Feed_local_dir.join("#{Feed_slug}.json")
 38 | Feed_location = Feed_web_dir.join(Feed_file.basename)
 39 | 
 40 | Previous_items = begin
 41 |   JSON.parse(Feed_file.read)['items']
 42 | rescue Errno::ENOENT
 43 |   [{}]
 44 | end
 45 | 
 46 | Previous_newest_id = Previous_items.first['id']
 47 | 
 48 | ########## EDIT ##########
 49 | # Strategy for getting new items
 50 | Image_extensions = ['.gif', '.jpg', '.png'].freeze
 51 | User_agent = 'Listing top posts on a private RSS feed on linux:subreddittop:0.0.1 (no account)'
 52 | 
 53 | Config_file = Feed_settings_dir.join('subreddits.txt')
 54 | abort "Missing #{Config_file}" unless Config_file.file?
 55 | 
 56 | # Config file should be of the form (one per line):
 57 | # subreddit_name | check_period
 58 | # Example:
 59 | # wallpapers | day
 60 | # surrealism | month
 61 | # check period should be one ofs#{check_periods} and defaults to #{default_period}
 62 | 
 63 | Subreddits = lambda {
 64 |   check_periods = %w[day week month year all].freeze
 65 |   default_period = 'week'
 66 | 
 67 |   lines = Config_file.readlines(chomp: true)
 68 | 
 69 |   lines.each_with_object({}) { |line, hash|
 70 |     subreddit, check_in = line.split('|').map(&:strip)
 71 |     period = check_periods.include?(check_in) ? check_in : default_period
 72 | 
 73 |     hash[subreddit] = period
 74 |   }
 75 | }.call
 76 | 
 77 | fetch_reddit_json = lambda { |user_agent, url|
 78 |   begin
 79 |     tries ||= 3
 80 | 
 81 |     response = Net::HTTP.get_response(url, 'User-Agent' => user_agent)
 82 | 
 83 |     case response
 84 |     when Net::HTTPSuccess
 85 |       JSON.parse(response.body)
 86 |     when Net::HTTPRedirection
 87 |       location = URI.parse(response['location'])
 88 |       fetch_reddit_json(location)
 89 |     end
 90 |   rescue Net::OpenTimeout
 91 |     unless (tries -= 1).zero?
 92 |       puts 'Encountered a timeout. Retrying in a few seconds.'
 93 |       sleep 10
 94 |       retry
 95 |     end
 96 | 
 97 |     abort 'Failed to get JSON.'
 98 |   end
 99 | }.curry.call(User_agent)
100 | 
101 | New_items = Subreddits.each_with_object([]) { |(subreddit, period), array|
102 |   warn "Checking #{subreddit}…"
103 |   sleep 3 # Avoid rate limiting
104 | 
105 |   sub_url = URI.parse("https://www.reddit.com/r/#{subreddit}/top.json?t=#{period}&limit=1")
106 | 
107 |   top_item = fetch_reddit_json.call(sub_url)['data']['children'].first['data']
108 |   item_url = URI.join(Site_url, top_item['permalink'].split('/').map { |a| CGI.escape(a) }.join('/'))
109 |   item_title = top_item['title']
110 | 
111 |   content = lambda {
112 |     top_post_json_url = URI.parse("#{item_url}.json")
113 |     data = fetch_reddit_json.call(top_post_json_url).first['data']['children'].first['data']
114 | 
115 |     html = []
116 |     html.push("<p>From r/#{subreddit}</p>")
117 | 
118 |     # If post is just text, return it now
119 |     text = Nokogiri::HTML.parse(data['selftext_html'])
120 |     html.push(text) unless text.nil?
121 | 
122 |     return html if data['url'] == item_url
123 | 
124 |     # If post is video, embed it
125 |     video_preview = data.dig('secure_media', 'reddit_video', 'fallback_url') ||
126 |                     data.dig('preview', 'reddit_video_preview', 'fallback_url')
127 | 
128 |     if video_preview
129 |       html.push("<video><source src='#{video_preview}'></video>")
130 |       return html
131 |     end
132 | 
133 |     # If post is image, embed it
134 |     if Image_extensions.any? { |ext| Pathname.new(data['url']).extname == ext }
135 |       html.push("<img src='#{data['url']}'>")
136 |       return html
137 |     end
138 | 
139 |     # If post is gallery, embed all images
140 |     if data['is_gallery']
141 |       data['gallery_data']['items'].map { |item| item['media_id'] }.each do |id|
142 |         item = data['media_metadata'].find { |metadata| metadata[1]['id'] == id }[1]
143 |         url = item['s']['u'].gsub('&amp;', '&')
144 | 
145 |         html.push("<img src='#{url}'>")
146 |       end
147 |     end
148 | 
149 |     # If post is a URL
150 |     html.push("<a href='#{data['url']}'>#{data['url']}</a>")
151 | 
152 |     html
153 |   }.call.join
154 | 
155 |   array.push(title: item_title, id: item_url, url: "#{item_url}.compact", content_html: content)
156 | }
157 | 
158 | ########## LEAVE ##########
159 | # If there are no new items, inform and exit
160 | if New_items.empty?
161 |   puts 'No new items to add'
162 |   exit 0
163 | end
164 | 
165 | # Rebuild feed
166 | Feed = {
167 |   version: 'https://jsonfeed.org/version/1',
168 |   title: Feed_title,
169 |   home_page_url: Site_url.to_s,
170 |   feed_url: Feed_location.to_path,
171 | 
172 |   # Prepend new items to the old and limit the amount of items
173 |   items: (New_items + Previous_items).first(100)
174 | }
175 | 
176 | Feed_file.write(JSON.pretty_generate(Feed))
177 | 


--------------------------------------------------------------------------------
/the-daily-show-between-the-scenes:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | # frozen_string_literal: true
 3 | 
 4 | require 'json'
 5 | require 'net/http'
 6 | require 'nokogiri'
 7 | require 'pathname'
 8 | 
 9 | ########## EDIT ##########
10 | # Configure feed info
11 | Site_url = URI.parse('https://www.youtube.com/feeds/videos.xml?channel_id=UCwWhs_6x42TyRM4Wstoq8HA')
12 | Feed_title = 'The Daily Show Between the Scenes'
13 | 
14 | ########## LEAVE ##########
15 | # Check for common required environment variables
16 | begin
17 |   Feed_local_dir = Pathname.new(ENV['FEED_LOCAL_DIR'])
18 |   Feed_web_dir = Pathname.new(ENV['FEED_WEB_DIR'])
19 |   Feed_settings_dir = Pathname.new(ENV['FEED_SETTINGS_DIR'])
20 | 
21 |   raise Errno::ENOENT unless Feed_local_dir.directory? && Feed_web_dir && Feed_settings_dir.directory?
22 | rescue TypeError, Errno::ENOENT
23 |   abort <<~ERROR
24 |     Some required environment variables are not set correctly.
25 |     Full list to verify:
26 | 
27 |     FEED_LOCAL_DIR: path to directory to read and write feed
28 |     FEED_WEB_DIR: url to web directory hosting the feed
29 |     FEED_SETTINGS_DIR: path to directory containing feed script config files
30 |   ERROR
31 | end
32 | 
33 | # Prepare the feed
34 | Feed_slug = Feed_title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
35 | Feed_file = Feed_local_dir.join("#{Feed_slug}.json")
36 | Feed_location = Feed_web_dir.join(Feed_file.basename)
37 | 
38 | Previous_items = begin
39 |   JSON.parse(Feed_file.read)['items']
40 | rescue Errno::ENOENT
41 |   [{}]
42 | end
43 | 
44 | Previous_newest_id = Previous_items.first['id']
45 | 
46 | ########## EDIT ##########
47 | # Strategy for getting new items
48 | Latest_entries = Nokogiri::XML(Net::HTTP.get(Site_url))
49 |                          .remove_namespaces!
50 |                          .css('entry')
51 |                          .select { |e| e.at('title').text.match?('Between the Scenes') }
52 | 
53 | New_items = Latest_entries.each_with_object([]) do |entry, array|
54 |   entry_url = entry.at('link').attr('href')
55 | 
56 |   # Stop adding once we reach the latest already present
57 |   break if entry_url == Previous_newest_id
58 | 
59 |   entry_title = entry.at('title').text
60 |   entry_thumbnail = entry.at('thumbnail').attr('url')
61 |   entry_description = entry.at('description').text.sub(/\n.*/m, '')
62 |   item_content = "<p>#{entry_description}</p><img src='#{entry_thumbnail}'>"
63 | 
64 |   array.push(title: entry_title, id: entry_url, url: entry_url, content_html: item_content)
65 | end
66 | 
67 | ########## LEAVE ##########
68 | # If there are no new items, inform and exit
69 | if New_items.empty?
70 |   puts 'No new items to add'
71 |   exit 0
72 | end
73 | 
74 | # Rebuild feed
75 | Feed = {
76 |   version: 'https://jsonfeed.org/version/1',
77 |   title: Feed_title,
78 |   home_page_url: Site_url.to_s,
79 |   feed_url: Feed_location.to_path,
80 | 
81 |   # Prepend new items to the old and limit the amount of items
82 |   items: (New_items + Previous_items).first(100)
83 | }
84 | 
85 | Feed_file.write(JSON.pretty_generate(Feed))
86 | 


--------------------------------------------------------------------------------