├── LICENSE ├── README.md ├── subreddit-top └── the-daily-show-between-the-scenes /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Collection of small scripts to generate feeds 2 | 3 | These are mostly ready to run. The only thing you need to change is the line that indicates where the feed should be saved. 4 | 5 | ### subreddit-top 6 | Generates feed of top posts of chosen subreddits. 7 | 8 | ### the-daily-show-between-the-scene 9 | Generates feed for [The Daily Show](https://www.youtube.com/channel/UCwWhs_6x42TyRM4Wstoq8HA/)’s “Between the Scenes” videos. 10 | 11 | #### License 12 | The Unlicense (Public Domain, essentially) 13 | -------------------------------------------------------------------------------- /subreddit-top: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require 'cgi' 5 | require 'json' 6 | require 'net/http' 7 | require 'nokogiri' 8 | require 'uri' 9 | require 'pathname' 10 | 11 | ########## EDIT ########## 12 | # Configure feed info 13 | Site_url = URI.parse('https://reddit.com/') 14 | Feed_title = 'Subreddit top post amalgamation' 15 | 16 | ########## LEAVE ########## 17 | # Check for common required environment variables 18 | begin 19 | Feed_local_dir = Pathname.new(ENV['FEED_LOCAL_DIR']) 20 | Feed_web_dir = Pathname.new(ENV['FEED_WEB_DIR']) 21 | Feed_settings_dir = Pathname.new(ENV['FEED_SETTINGS_DIR']) 22 | 23 | raise Errno::ENOENT unless Feed_local_dir.directory? && Feed_web_dir && Feed_settings_dir.directory? 24 | rescue TypeError, Errno::ENOENT 25 | abort <<~ERROR 26 | Some required environment variables are not set correctly. 27 | Full list to verify: 28 | 29 | FEED_LOCAL_DIR: path to directory to read and write feed 30 | FEED_WEB_DIR: url to web directory hosting the feed 31 | FEED_SETTINGS_DIR: path to directory containing feed script config files 32 | ERROR 33 | end 34 | 35 | # Prepare the feed 36 | Feed_slug = Feed_title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') 37 | Feed_file = Feed_local_dir.join("#{Feed_slug}.json") 38 | Feed_location = Feed_web_dir.join(Feed_file.basename) 39 | 40 | Previous_items = begin 41 | JSON.parse(Feed_file.read)['items'] 42 | rescue Errno::ENOENT 43 | [{}] 44 | end 45 | 46 | Previous_newest_id = Previous_items.first['id'] 47 | 48 | ########## EDIT ########## 49 | # Strategy for getting new items 50 | Image_extensions = ['.gif', '.jpg', '.png'].freeze 51 | User_agent = 'Listing top posts on a private RSS feed on linux:subreddittop:0.0.1 (no account)' 52 | 53 | Config_file = Feed_settings_dir.join('subreddits.txt') 54 | abort "Missing #{Config_file}" unless Config_file.file? 55 | 56 | # Config file should be of the form (one per line): 57 | # subreddit_name | check_period 58 | # Example: 59 | # wallpapers | day 60 | # surrealism | month 61 | # check period should be one ofs#{check_periods} and defaults to #{default_period} 62 | 63 | Subreddits = lambda { 64 | check_periods = %w[day week month year all].freeze 65 | default_period = 'week' 66 | 67 | lines = Config_file.readlines(chomp: true) 68 | 69 | lines.each_with_object({}) { |line, hash| 70 | subreddit, check_in = line.split('|').map(&:strip) 71 | period = check_periods.include?(check_in) ? check_in : default_period 72 | 73 | hash[subreddit] = period 74 | } 75 | }.call 76 | 77 | fetch_reddit_json = lambda { |user_agent, url| 78 | begin 79 | tries ||= 3 80 | 81 | response = Net::HTTP.get_response(url, 'User-Agent' => user_agent) 82 | 83 | case response 84 | when Net::HTTPSuccess 85 | JSON.parse(response.body) 86 | when Net::HTTPRedirection 87 | location = URI.parse(response['location']) 88 | fetch_reddit_json(location) 89 | end 90 | rescue Net::OpenTimeout 91 | unless (tries -= 1).zero? 92 | puts 'Encountered a timeout. Retrying in a few seconds.' 93 | sleep 10 94 | retry 95 | end 96 | 97 | abort 'Failed to get JSON.' 98 | end 99 | }.curry.call(User_agent) 100 | 101 | New_items = Subreddits.each_with_object([]) { |(subreddit, period), array| 102 | warn "Checking #{subreddit}…" 103 | sleep 3 # Avoid rate limiting 104 | 105 | sub_url = URI.parse("https://www.reddit.com/r/#{subreddit}/top.json?t=#{period}&limit=1") 106 | 107 | top_item = fetch_reddit_json.call(sub_url)['data']['children'].first['data'] 108 | item_url = URI.join(Site_url, top_item['permalink'].split('/').map { |a| CGI.escape(a) }.join('/')) 109 | item_title = top_item['title'] 110 | 111 | content = lambda { 112 | top_post_json_url = URI.parse("#{item_url}.json") 113 | data = fetch_reddit_json.call(top_post_json_url).first['data']['children'].first['data'] 114 | 115 | html = [] 116 | html.push("

From r/#{subreddit}

") 117 | 118 | # If post is just text, return it now 119 | text = Nokogiri::HTML.parse(data['selftext_html']) 120 | html.push(text) unless text.nil? 121 | 122 | return html if data['url'] == item_url 123 | 124 | # If post is video, embed it 125 | video_preview = data.dig('secure_media', 'reddit_video', 'fallback_url') || 126 | data.dig('preview', 'reddit_video_preview', 'fallback_url') 127 | 128 | if video_preview 129 | html.push("") 130 | return html 131 | end 132 | 133 | # If post is image, embed it 134 | if Image_extensions.any? { |ext| Pathname.new(data['url']).extname == ext } 135 | html.push("") 136 | return html 137 | end 138 | 139 | # If post is gallery, embed all images 140 | if data['is_gallery'] 141 | data['gallery_data']['items'].map { |item| item['media_id'] }.each do |id| 142 | item = data['media_metadata'].find { |metadata| metadata[1]['id'] == id }[1] 143 | url = item['s']['u'].gsub('&', '&') 144 | 145 | html.push("") 146 | end 147 | end 148 | 149 | # If post is a URL 150 | html.push("#{data['url']}") 151 | 152 | html 153 | }.call.join 154 | 155 | array.push(title: item_title, id: item_url, url: "#{item_url}.compact", content_html: content) 156 | } 157 | 158 | ########## LEAVE ########## 159 | # If there are no new items, inform and exit 160 | if New_items.empty? 161 | puts 'No new items to add' 162 | exit 0 163 | end 164 | 165 | # Rebuild feed 166 | Feed = { 167 | version: 'https://jsonfeed.org/version/1', 168 | title: Feed_title, 169 | home_page_url: Site_url.to_s, 170 | feed_url: Feed_location.to_path, 171 | 172 | # Prepend new items to the old and limit the amount of items 173 | items: (New_items + Previous_items).first(100) 174 | } 175 | 176 | Feed_file.write(JSON.pretty_generate(Feed)) 177 | -------------------------------------------------------------------------------- /the-daily-show-between-the-scenes: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require 'json' 5 | require 'net/http' 6 | require 'nokogiri' 7 | require 'pathname' 8 | 9 | ########## EDIT ########## 10 | # Configure feed info 11 | Site_url = URI.parse('https://www.youtube.com/feeds/videos.xml?channel_id=UCwWhs_6x42TyRM4Wstoq8HA') 12 | Feed_title = 'The Daily Show Between the Scenes' 13 | 14 | ########## LEAVE ########## 15 | # Check for common required environment variables 16 | begin 17 | Feed_local_dir = Pathname.new(ENV['FEED_LOCAL_DIR']) 18 | Feed_web_dir = Pathname.new(ENV['FEED_WEB_DIR']) 19 | Feed_settings_dir = Pathname.new(ENV['FEED_SETTINGS_DIR']) 20 | 21 | raise Errno::ENOENT unless Feed_local_dir.directory? && Feed_web_dir && Feed_settings_dir.directory? 22 | rescue TypeError, Errno::ENOENT 23 | abort <<~ERROR 24 | Some required environment variables are not set correctly. 25 | Full list to verify: 26 | 27 | FEED_LOCAL_DIR: path to directory to read and write feed 28 | FEED_WEB_DIR: url to web directory hosting the feed 29 | FEED_SETTINGS_DIR: path to directory containing feed script config files 30 | ERROR 31 | end 32 | 33 | # Prepare the feed 34 | Feed_slug = Feed_title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') 35 | Feed_file = Feed_local_dir.join("#{Feed_slug}.json") 36 | Feed_location = Feed_web_dir.join(Feed_file.basename) 37 | 38 | Previous_items = begin 39 | JSON.parse(Feed_file.read)['items'] 40 | rescue Errno::ENOENT 41 | [{}] 42 | end 43 | 44 | Previous_newest_id = Previous_items.first['id'] 45 | 46 | ########## EDIT ########## 47 | # Strategy for getting new items 48 | Latest_entries = Nokogiri::XML(Net::HTTP.get(Site_url)) 49 | .remove_namespaces! 50 | .css('entry') 51 | .select { |e| e.at('title').text.match?('Between the Scenes') } 52 | 53 | New_items = Latest_entries.each_with_object([]) do |entry, array| 54 | entry_url = entry.at('link').attr('href') 55 | 56 | # Stop adding once we reach the latest already present 57 | break if entry_url == Previous_newest_id 58 | 59 | entry_title = entry.at('title').text 60 | entry_thumbnail = entry.at('thumbnail').attr('url') 61 | entry_description = entry.at('description').text.sub(/\n.*/m, '') 62 | item_content = "

#{entry_description}

" 63 | 64 | array.push(title: entry_title, id: entry_url, url: entry_url, content_html: item_content) 65 | end 66 | 67 | ########## LEAVE ########## 68 | # If there are no new items, inform and exit 69 | if New_items.empty? 70 | puts 'No new items to add' 71 | exit 0 72 | end 73 | 74 | # Rebuild feed 75 | Feed = { 76 | version: 'https://jsonfeed.org/version/1', 77 | title: Feed_title, 78 | home_page_url: Site_url.to_s, 79 | feed_url: Feed_location.to_path, 80 | 81 | # Prepend new items to the old and limit the amount of items 82 | items: (New_items + Previous_items).first(100) 83 | } 84 | 85 | Feed_file.write(JSON.pretty_generate(Feed)) 86 | --------------------------------------------------------------------------------