├── site └── .gitignore ├── .gitignore ├── Gemfile ├── netlify.toml ├── .env.sample ├── .github ├── ISSUE_TEMPLATE │ └── feed-inclusion-request.md └── workflows │ └── upload_opml.yml ├── README.md ├── redo-opml.rb ├── template.erb.original ├── template.erb ├── crawl.rb └── engblogs.opml /site/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .env 3 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem "feedjira" 4 | gem "thread" 5 | gem "dotenv" 6 | -------------------------------------------------------------------------------- /netlify.toml: -------------------------------------------------------------------------------- 1 | [[redirects]] 2 | from = "/" 3 | to = "http://engblogs.s3.amazonaws.com/index.html" 4 | status = 200 5 | -------------------------------------------------------------------------------- /.env.sample: -------------------------------------------------------------------------------- 1 | AWS_ACCESS_KEY_ID=XXXX 2 | AWS_SECRET_ACCESS_KEY=XXXX 3 | AWS_DEFAULT_REGION=eu-west-1 4 | S3_BUCKET_NAME=engblogs 5 | DYNAMODB_TABLE_NAME=engblogs -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feed-inclusion-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feed inclusion request 3 | about: Suggest a new feed 4 | title: Feed suggestion 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What is the URL of your blog?** 11 | ... 12 | 13 | **What is the feed URL for your blog?** 14 | ... 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # engineering blogs newsfeed 2 | 3 | As deployed at https://engineeringblogs.xyz/ 4 | 5 | Feel free to suggest more worthwhile engineering blogs by opening an issue. Not all will be accepted but all submissions appreciated nonetheless. Be sure to include the blog URL *and* feed URL (important as not all of you make this obvious on the page) :-) 6 | -------------------------------------------------------------------------------- /.github/workflows/upload_opml.yml: -------------------------------------------------------------------------------- 1 | name: upload_opml 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | workflow_dispatch: 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: zdurham/s3-upload-github-action@master 16 | with: 17 | args: --acl public-read 18 | env: 19 | FILE: ./engblogs.opml 20 | AWS_REGION: 'eu-west-1' 21 | S3_BUCKET: 'engblogs' 22 | S3_KEY: 'engblogs.opml' 23 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 24 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 25 | -------------------------------------------------------------------------------- /redo-opml.rb: -------------------------------------------------------------------------------- 1 | # Cleans up the OPML file and gets rid of any feeds that no 2 | # longer load or parse properly. 3 | # 4 | # ruby redo-opml.rb > new.opml 5 | # aws s3 cp new.opml s3://engblogs/ --acl public-read 6 | # ... 7 | # aws s3 cp engblogs.opml s3://engblogs/ --acl public-read 8 | 9 | require 'dotenv' 10 | Dotenv.load 11 | require 'open-uri' 12 | require 'feedjira' 13 | require 'thread/pool' 14 | 15 | semaphore = Mutex.new 16 | 17 | pool = Thread.pool(40) 18 | 19 | opml = URI.open("http://#{ENV['S3_BUCKET_NAME']}.s3.#{ENV['AWS_DEFAULT_REGION']}.amazonaws.com/#{ENV['S3_BUCKET_NAME']}.opml").read 20 | 21 | feeds = opml.scan(//) 22 | 23 | feeds.map! do |feed| 24 | { 25 | title: feed[/title=(\"|\')(.*?)\1/i, 2], 26 | xmlurl: feed[/xmlurl=(\"|\')(.*?)\1/i, 2], 27 | htmlurl: feed[/htmlurl=(\"|\')(.*?)\1/i, 2], 28 | } 29 | end 30 | 31 | puts %{ 32 | 33 | 34 | Engineering Blogs 35 | 36 | 37 | } 38 | 39 | feeds.each do |feed| 40 | pool.process do 41 | 42 | STDERR.puts "Doing #{feed[:title]}" 43 | begin 44 | xml = URI.open(feed[:xmlurl], "User-Agent" => "My RSS Reader").read 45 | rescue Net::OpenTimeout, OpenSSL::SSL::SSLError, SocketError, OpenURI::HTTPError, URI::InvalidURIError => e 46 | STDERR.puts " FAILURE #{e}" 47 | next 48 | end 49 | 50 | xml.sub!(/\<\?.*?\?\>/, '') 51 | begin 52 | pfeed = Feedjira.parse(xml) 53 | rescue Feedjira::NoParserAvailable 54 | STDERR.puts " FAILURE" 55 | next 56 | end 57 | entries = pfeed.entries.map do |entry| 58 | { 59 | published: entry.published, 60 | title: entry.title, 61 | url: entry.url 62 | } 63 | end 64 | 65 | entries = entries.select { |entry| (Time.now - entry[:published]) < (86400 * 7) } 66 | STDERR.puts " Fetched #{pfeed.entries.size} entries, #{entries.size} recent" 67 | 68 | opml_string = %{} 69 | 70 | semaphore.synchronize do 71 | puts opml_string 72 | end 73 | end 74 | end 75 | 76 | pool.shutdown 77 | 78 | puts %{ 79 | 80 | } 81 | -------------------------------------------------------------------------------- /template.erb.original: -------------------------------------------------------------------------------- 1 | 2 | 3 | Engineering Blogs 4 | 5 | 76 | 77 | 78 | 79 |
80 | 81 |

engineering blogs

82 |

from <%= source_count %> sources [OPML]

83 |

last built at <%= Time.now %>

84 | 85 |
86 | <% 87 | old_date = '' 88 | items.each do |item| 89 | d = item[:published].strftime("%Y-%m-%d") %> 90 | 91 | <% if old_date != d %> 92 |

<%= d %>

93 | <% end %> 94 |
95 |
<%= item[:published].strftime("%H") %>h
96 | 100 |
101 | <% 102 | old_date = d 103 | end %> 104 |
105 | 106 | 107 |

Idea inspired by the engineering-blogs list (although it turns out a lot of them are dead/invalid so I cleaned it up and am adding more choices of my own).

108 | 109 |

GitHub repo.

110 |
111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /template.erb: -------------------------------------------------------------------------------- 1 | 2 | 3 | Engineering Blogs 4 | 5 | 84 | 85 | 86 | 87 |
88 | 89 |

<%= source_count %> engineering blogs OPML

90 |

last built at <%= Time.now.getutc.strftime("%Y-%m-%d %H:%M UTC") %>

91 | 92 |
93 | <% 94 | old_date = '' 95 | items.each do |item| 96 | d = item[:published].strftime("%Y-%m-%d") %> 97 | 98 | <% if old_date != d %> 99 |

<%= d %>

100 | <% end %> 101 |
102 | 103 | 104 |
<%= item[:feed] %> 
105 |
106 | <% 107 | old_date = d 108 | end %> 109 |
110 | 111 | 112 | 115 |
116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /crawl.rb: -------------------------------------------------------------------------------- 1 | # Crawl feeds from OPML file and build a page 2 | # to display the resulting items on. 3 | 4 | require 'dotenv' 5 | Dotenv.load 6 | require 'aws-sdk-dynamodb' 7 | require 'feedjira' 8 | require 'thread/pool' 9 | require 'digest/sha1' 10 | require 'open-uri' 11 | 12 | 13 | # ------------------- 14 | # STEP ONE: READ AND PARSE OPML FILE 15 | # ------------------- 16 | 17 | opml = URI.open("http://#{ENV['S3_BUCKET_NAME']}.s3.#{ENV['AWS_DEFAULT_REGION']}.amazonaws.com/#{ENV['S3_BUCKET_NAME']}.opml").read 18 | 19 | # Can't be bothered with the dependencies, so let's go oldschool.. 20 | feeds = opml.scan(//) 21 | feeds.map! do |feed| 22 | { 23 | title: feed[/title=(\"|\')(.*?)\1/i, 2], 24 | xmlurl: feed[/xmlurl=(\"|\')(.*?)\1/i, 2], 25 | htmlurl: feed[/htmlurl=(\"|\')(.*?)\1/i, 2], 26 | } 27 | end 28 | 29 | 30 | # ------------------- 31 | # STEP TWO: CRAWL ALL THE FEEDS AND GET THE ITEMS 32 | # ------------------- 33 | 34 | pool = Thread.pool(20) 35 | semaphore = Mutex.new 36 | dynamodb = Aws::DynamoDB::Client.new 37 | 38 | feeds.each do |feed| 39 | pool.process do 40 | STDERR.puts "Doing #{feed[:title]}" 41 | 42 | # Fetch the feed. If we can't, fail. 43 | begin 44 | xml = URI.open(feed[:xmlurl], :open_timeout => 5, :read_timeout => 10, "User-Agent" => "My RSS Reader" ).read 45 | rescue Net::OpenTimeout, OpenSSL::SSL::SSLError, SocketError, OpenURI::HTTPError, URI::InvalidURIError => e 46 | STDERR.puts " FAILURE #{e}" 47 | next 48 | end 49 | 50 | # Parse the feed. If we can't, fail. 51 | begin 52 | pfeed = Feedjira.parse(xml) 53 | rescue Feedjira::NoParserAvailable 54 | STDERR.puts " FAILURE" 55 | next 56 | end 57 | 58 | entries = pfeed.entries.map do |entry| 59 | { 60 | published: entry.published, 61 | title: entry.title.to_s.strip, 62 | url: entry.url.to_s.strip, 63 | feed: feed[:title], 64 | feed_site: feed[:htmlurl] 65 | } 66 | end 67 | 68 | # We only want items that are less than 8 days old 69 | entries = entries.select { |entry| (Time.now - entry[:published]) < (86400 * 8) } 70 | 71 | STDERR.puts " Fetched #{pfeed.entries.size} entries, #{entries.size} recent" 72 | 73 | # Put items into DynamoDB 74 | semaphore.synchronize do 75 | entries.each do |entry| 76 | t = entry[:published] 77 | params = { 78 | table_name: ENV['DYNAMODB_TABLE_NAME'], 79 | item: { 80 | date: t.strftime("%Y-%m-%d"), 81 | hash: Digest::SHA1.hexdigest(entry[:url]), 82 | ttl: t.to_i + (86400 * 7) + 3600 83 | } 84 | } 85 | params[:item].merge!(entry) 86 | params[:item][:published] = params[:item][:published].to_s 87 | 88 | begin 89 | dynamodb.put_item(params) 90 | STDERR.puts "Added" 91 | rescue Aws::DynamoDB::Errors::ServiceError => error 92 | STDERR.puts "ERROR" 93 | STDERR.puts error.message 94 | end 95 | end 96 | end 97 | end 98 | end 99 | 100 | pool.shutdown 101 | 102 | STDERR.puts "Finished" 103 | -------------------------------------------------------------------------------- /engblogs.opml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Engineering Blogs 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | --------------------------------------------------------------------------------