├── site
└── .gitignore
├── .gitignore
├── Gemfile
├── netlify.toml
├── .env.sample
├── .github
├── ISSUE_TEMPLATE
│ └── feed-inclusion-request.md
└── workflows
│ └── upload_opml.yml
├── README.md
├── redo-opml.rb
├── template.erb.original
├── template.erb
├── crawl.rb
└── engblogs.opml
/site/.gitignore:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .env
3 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 |
3 | gem "feedjira"
4 | gem "thread"
5 | gem "dotenv"
6 |
--------------------------------------------------------------------------------
/netlify.toml:
--------------------------------------------------------------------------------
1 | [[redirects]]
2 | from = "/"
3 | to = "http://engblogs.s3.amazonaws.com/index.html"
4 | status = 200
5 |
--------------------------------------------------------------------------------
/.env.sample:
--------------------------------------------------------------------------------
1 | AWS_ACCESS_KEY_ID=XXXX
2 | AWS_SECRET_ACCESS_KEY=XXXX
3 | AWS_DEFAULT_REGION=eu-west-1
4 | S3_BUCKET_NAME=engblogs
5 | DYNAMODB_TABLE_NAME=engblogs
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feed-inclusion-request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feed inclusion request
3 | about: Suggest a new feed
4 | title: Feed suggestion
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **What is the URL of your blog?**
11 | ...
12 |
13 | **What is the feed URL for your blog?**
14 | ...
15 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # engineering blogs newsfeed
2 |
3 | As deployed at https://engineeringblogs.xyz/
4 |
5 | Feel free to suggest more worthwhile engineering blogs by opening an issue. Not all will be accepted but all submissions appreciated nonetheless. Be sure to include the blog URL *and* feed URL (important as not all of you make this obvious on the page) :-)
6 |
--------------------------------------------------------------------------------
/.github/workflows/upload_opml.yml:
--------------------------------------------------------------------------------
1 | name: upload_opml
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 | workflow_dispatch:
9 |
10 | jobs:
11 | build:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@v2
15 | - uses: zdurham/s3-upload-github-action@master
16 | with:
17 | args: --acl public-read
18 | env:
19 | FILE: ./engblogs.opml
20 | AWS_REGION: 'eu-west-1'
21 | S3_BUCKET: 'engblogs'
22 | S3_KEY: 'engblogs.opml'
23 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
24 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
25 |
--------------------------------------------------------------------------------
/redo-opml.rb:
--------------------------------------------------------------------------------
1 | # Cleans up the OPML file and gets rid of any feeds that no
2 | # longer load or parse properly.
3 | #
4 | # ruby redo-opml.rb > new.opml
5 | # aws s3 cp new.opml s3://engblogs/ --acl public-read
6 | # ...
7 | # aws s3 cp engblogs.opml s3://engblogs/ --acl public-read
8 |
9 | require 'dotenv'
10 | Dotenv.load
11 | require 'open-uri'
12 | require 'feedjira'
13 | require 'thread/pool'
14 |
15 | semaphore = Mutex.new
16 |
17 | pool = Thread.pool(40)
18 |
19 | opml = URI.open("http://#{ENV['S3_BUCKET_NAME']}.s3.#{ENV['AWS_DEFAULT_REGION']}.amazonaws.com/#{ENV['S3_BUCKET_NAME']}.opml").read
20 |
21 | feeds = opml.scan(//)
22 |
23 | feeds.map! do |feed|
24 | {
25 | title: feed[/title=(\"|\')(.*?)\1/i, 2],
26 | xmlurl: feed[/xmlurl=(\"|\')(.*?)\1/i, 2],
27 | htmlurl: feed[/htmlurl=(\"|\')(.*?)\1/i, 2],
28 | }
29 | end
30 |
31 | puts %{
32 |
33 |
34 | Engineering Blogs
35 |
36 |
37 | }
38 |
39 | feeds.each do |feed|
40 | pool.process do
41 |
42 | STDERR.puts "Doing #{feed[:title]}"
43 | begin
44 | xml = URI.open(feed[:xmlurl], "User-Agent" => "My RSS Reader").read
45 | rescue Net::OpenTimeout, OpenSSL::SSL::SSLError, SocketError, OpenURI::HTTPError, URI::InvalidURIError => e
46 | STDERR.puts " FAILURE #{e}"
47 | next
48 | end
49 |
50 | xml.sub!(/\<\?.*?\?\>/, '')
51 | begin
52 | pfeed = Feedjira.parse(xml)
53 | rescue Feedjira::NoParserAvailable
54 | STDERR.puts " FAILURE"
55 | next
56 | end
57 | entries = pfeed.entries.map do |entry|
58 | {
59 | published: entry.published,
60 | title: entry.title,
61 | url: entry.url
62 | }
63 | end
64 |
65 | entries = entries.select { |entry| (Time.now - entry[:published]) < (86400 * 7) }
66 | STDERR.puts " Fetched #{pfeed.entries.size} entries, #{entries.size} recent"
67 |
68 | opml_string = %{}
69 |
70 | semaphore.synchronize do
71 | puts opml_string
72 | end
73 | end
74 | end
75 |
76 | pool.shutdown
77 |
78 | puts %{
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/template.erb.original:
--------------------------------------------------------------------------------
1 |
2 |
3 | Engineering Blogs
4 |
5 |
76 |
77 |
78 |
79 |
80 |
81 | engineering blogs
82 | from <%= source_count %> sources [OPML]
83 | last built at <%= Time.now %>
84 |
85 |
86 | <%
87 | old_date = ''
88 | items.each do |item|
89 | d = item[:published].strftime("%Y-%m-%d") %>
90 |
91 | <% if old_date != d %>
92 |
<%= d %>
93 | <% end %>
94 |
95 |
<%= item[:published].strftime("%H") %>h
96 |
100 |
101 | <%
102 | old_date = d
103 | end %>
104 |
105 |
106 |
107 | Idea inspired by the engineering-blogs list (although it turns out a lot of them are dead/invalid so I cleaned it up and am adding more choices of my own).
108 |
109 | GitHub repo.
110 |
111 |
112 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/template.erb:
--------------------------------------------------------------------------------
1 |
2 |
3 | Engineering Blogs
4 |
5 |
84 |
85 |
86 |
87 |
88 |
89 | <%= source_count %> engineering blogs OPML
90 | last built at <%= Time.now.getutc.strftime("%Y-%m-%d %H:%M UTC") %>
91 |
92 |
93 | <%
94 | old_date = ''
95 | items.each do |item|
96 | d = item[:published].strftime("%Y-%m-%d") %>
97 |
98 | <% if old_date != d %>
99 |
<%= d %>
100 | <% end %>
101 |
102 |
103 |
104 |
<%= item[:feed] %>
105 |
106 | <%
107 | old_date = d
108 | end %>
109 |
110 |
111 |
112 |
115 |
116 |
117 |
118 |
119 |
120 |
--------------------------------------------------------------------------------
/crawl.rb:
--------------------------------------------------------------------------------
1 | # Crawl feeds from OPML file and build a page
2 | # to display the resulting items on.
3 |
4 | require 'dotenv'
5 | Dotenv.load
6 | require 'aws-sdk-dynamodb'
7 | require 'feedjira'
8 | require 'thread/pool'
9 | require 'digest/sha1'
10 | require 'open-uri'
11 |
12 |
13 | # -------------------
14 | # STEP ONE: READ AND PARSE OPML FILE
15 | # -------------------
16 |
17 | opml = URI.open("http://#{ENV['S3_BUCKET_NAME']}.s3.#{ENV['AWS_DEFAULT_REGION']}.amazonaws.com/#{ENV['S3_BUCKET_NAME']}.opml").read
18 |
19 | # Can't be bothered with the dependencies, so let's go oldschool..
20 | feeds = opml.scan(//)
21 | feeds.map! do |feed|
22 | {
23 | title: feed[/title=(\"|\')(.*?)\1/i, 2],
24 | xmlurl: feed[/xmlurl=(\"|\')(.*?)\1/i, 2],
25 | htmlurl: feed[/htmlurl=(\"|\')(.*?)\1/i, 2],
26 | }
27 | end
28 |
29 |
30 | # -------------------
31 | # STEP TWO: CRAWL ALL THE FEEDS AND GET THE ITEMS
32 | # -------------------
33 |
34 | pool = Thread.pool(20)
35 | semaphore = Mutex.new
36 | dynamodb = Aws::DynamoDB::Client.new
37 |
38 | feeds.each do |feed|
39 | pool.process do
40 | STDERR.puts "Doing #{feed[:title]}"
41 |
42 | # Fetch the feed. If we can't, fail.
43 | begin
44 | xml = URI.open(feed[:xmlurl], :open_timeout => 5, :read_timeout => 10, "User-Agent" => "My RSS Reader" ).read
45 | rescue Net::OpenTimeout, OpenSSL::SSL::SSLError, SocketError, OpenURI::HTTPError, URI::InvalidURIError => e
46 | STDERR.puts " FAILURE #{e}"
47 | next
48 | end
49 |
50 | # Parse the feed. If we can't, fail.
51 | begin
52 | pfeed = Feedjira.parse(xml)
53 | rescue Feedjira::NoParserAvailable
54 | STDERR.puts " FAILURE"
55 | next
56 | end
57 |
58 | entries = pfeed.entries.map do |entry|
59 | {
60 | published: entry.published,
61 | title: entry.title.to_s.strip,
62 | url: entry.url.to_s.strip,
63 | feed: feed[:title],
64 | feed_site: feed[:htmlurl]
65 | }
66 | end
67 |
68 | # We only want items that are less than 8 days old
69 | entries = entries.select { |entry| (Time.now - entry[:published]) < (86400 * 8) }
70 |
71 | STDERR.puts " Fetched #{pfeed.entries.size} entries, #{entries.size} recent"
72 |
73 | # Put items into DynamoDB
74 | semaphore.synchronize do
75 | entries.each do |entry|
76 | t = entry[:published]
77 | params = {
78 | table_name: ENV['DYNAMODB_TABLE_NAME'],
79 | item: {
80 | date: t.strftime("%Y-%m-%d"),
81 | hash: Digest::SHA1.hexdigest(entry[:url]),
82 | ttl: t.to_i + (86400 * 7) + 3600
83 | }
84 | }
85 | params[:item].merge!(entry)
86 | params[:item][:published] = params[:item][:published].to_s
87 |
88 | begin
89 | dynamodb.put_item(params)
90 | STDERR.puts "Added"
91 | rescue Aws::DynamoDB::Errors::ServiceError => error
92 | STDERR.puts "ERROR"
93 | STDERR.puts error.message
94 | end
95 | end
96 | end
97 | end
98 | end
99 |
100 | pool.shutdown
101 |
102 | STDERR.puts "Finished"
103 |
--------------------------------------------------------------------------------
/engblogs.opml:
--------------------------------------------------------------------------------
1 |
2 |
3 | Engineering Blogs
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
--------------------------------------------------------------------------------