├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ └── ruby.yml ├── .gitignore ├── .mergify.yml ├── .prettier.rc ├── .rspec ├── .rubocop.yml ├── .yamllint.yml ├── Gemfile ├── Gemfile.lock ├── LICENSE ├── Makefile ├── README.md ├── bin ├── console ├── generator ├── json_to_xml ├── rspec_changed_configs └── setup ├── html2rss-configs.gemspec ├── lib └── html2rss │ ├── configs.rb │ └── configs │ ├── adfc.de │ └── pressemitteilungen.yml │ ├── apnews.com │ └── hub.yml │ ├── avherald.com │ └── index.yml │ ├── bbc.co.uk │ └── available_episodes.yml │ ├── bbc.com │ └── mundo.yml │ ├── canarianweekly.com │ └── front.yml │ ├── cinemascore.com │ └── index.yml │ ├── cleanenergywire.org │ └── news.yml │ ├── cnet.com │ └── section_sub.yml │ ├── computerbase.de │ └── meistgelesen.yml │ ├── cutle.fish │ └── index.yml │ ├── deraktionaer.de │ └── meistgelesen.yml │ ├── developer.apple.com │ └── tutorials_data_documentation_technotes_json.yml │ ├── dfs.de │ └── pressemitteilungen.yml │ ├── dsw-info.de │ └── presse.yml │ ├── espn.com │ └── f1.yml │ ├── fia.com │ └── documents.yml │ ├── formula1.com │ └── latest.yml │ ├── github.com │ └── releases.yml │ ├── iaapa.org │ └── news.yml │ ├── imdb.com │ └── ratings.yml │ ├── ingenieur.de │ └── karriere_arbeitsleben_heiko_mell.yml │ ├── kinocheck.de │ └── filmstarts.yml │ ├── newyorker.com │ └── magazine.yml │ ├── nomanssky.com │ └── news.yml │ ├── pankow.lebensmittel-kontrollergebnisse.de │ └── search.yml │ ├── phys.org │ └── weekly.yml │ ├── rbb24.de │ └── meistgeklickt.yml │ ├── robinwood.de │ └── aktuelles.yml │ ├── s3.amazonaws.com │ └── popular_movies.yml │ ├── sebastianvettel.de │ └── news.yml │ ├── softwareleadweekly.com │ └── issues.yml │ ├── solarthermalworld.org │ └── news.yml │ ├── spektrum.de │ └── meistgelesen.yml │ ├── spiegel.de │ └── impressum_autor.yml │ ├── stackoverflow.com │ └── hot_network_questions.yml │ ├── steuerzahler.de │ └── news.yml │ ├── stripes.com │ └── index.yml │ ├── support.apple.com │ ├── en_gb_ht201222.yml │ └── exchange_repair.yml │ ├── teneriffa-news.com │ └── news.yml │ ├── test.de │ └── archiv.yml │ ├── theguardian.com │ └── international_mostpopular.yml │ ├── thoughtworks.com │ └── insights.yml │ ├── tourismusnetzwerk-brandenburg.de │ └── aktuelle_nachrichten.yml │ ├── version.rb │ └── webentwickler-jobs.de │ └── in.yml └── spec ├── html2rss ├── configs │ ├── adfc.de │ │ └── pressemitteilungen.yml_spec.rb │ ├── apnews.com │ │ └── hub.yml_spec.rb │ ├── avherald.com │ │ └── index.yml_spec.rb │ ├── bbc.co.uk │ │ └── available_episodes.yml_spec.rb │ ├── bbc.com │ │ └── mundo.yml_spec.rb │ ├── canarianweekly.com │ │ └── front.yml_spec.rb │ ├── cinemascore.com │ │ └── index.yml_spec.rb │ ├── cleanenergywire.org │ │ └── news.yml_spec.rb │ ├── cnet.com │ │ └── section_sub.yml_spec.rb │ ├── computerbase.de │ │ └── meistgelesen.yml_spec.rb │ ├── cutle.fish │ │ └── index.yml_spec.rb │ ├── deraktionaer.de │ │ └── meistgelesen.yml_spec.rb │ ├── developer.apple.com │ │ └── tutorials_data_documentation_technotes_json.yml_spec.rb │ ├── dfs.de │ │ └── pressemitteilungen.yml_spec.rb │ ├── dsw-info.de │ │ └── presse.yml_spec.rb │ ├── espn.com │ │ └── f1.yml_spec.rb │ ├── fia.com │ │ └── documents.yml_spec.rb │ ├── formula1.com │ │ └── latest.yml_spec.rb │ ├── github.com │ │ └── releases.yml_spec.rb │ ├── iaapa.org │ │ └── news.yml_spec.rb │ ├── imdb.com │ │ └── ratings.yml_spec.rb │ ├── ingenieur.de │ │ └── karriere_arbeitsleben_heiko_mell.yml_spec.rb │ ├── kinocheck.de │ │ └── filmstarts.yml_spec.rb │ ├── newyorker.com │ │ └── magazine.yml_spec.rb │ ├── nomanssky.com │ │ └── news.yml_spec.rb │ ├── pankow.lebensmittel-kontrollergebnisse.de │ │ └── search.yml_spec.rb │ ├── phys.org │ │ └── weekly.yml_spec.rb │ ├── rbb24.de │ │ └── meistgeklickt.yml_spec.rb │ ├── robinwood.de │ │ └── aktuelles.yml_spec.rb │ ├── s3.amazonaws.com │ │ └── popular_movies.yml_spec.rb │ ├── sebastianvettel.de │ │ └── news.yml_spec.rb │ ├── softwareleadweekly.com │ │ └── issues.yml_spec.rb │ ├── solarthermalworld.org │ │ └── news.yml_spec.rb │ ├── spektrum.de │ │ └── meistgelesen.yml_spec.rb │ ├── spiegel.de │ │ └── impressum_autor.yml_spec.rb │ ├── stackoverflow.com │ │ └── hot_network_questions.yml_spec.rb │ ├── steuerzahler.de │ │ └── news.yml_spec.rb │ ├── stripes.com │ │ └── index.yml_spec.rb │ ├── support.apple.com │ │ ├── en_gb_ht201222.yml_spec.rb │ │ └── exchange_repair.yml_spec.rb │ ├── teneriffa-news.com │ │ └── news.yml_spec.rb │ ├── test.de │ │ └── archiv.yml_spec.rb │ ├── theguardian.com │ │ └── international_mostpopular.yml_spec.rb │ ├── thoughtworks.com │ │ └── insights.yml_spec.rb │ ├── tourismusnetzwerk-brandenburg.de │ │ └── aktuelle_nachrichten.yml_spec.rb │ └── webentwickler-jobs.de │ │ └── in.yml_spec.rb └── configs_spec.rb ├── spec_helper.rb └── support ├── helper.rb └── shared_examples └── config.yml_spec.rb /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: gildesmarais 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "bundler" 9 | directory: "/" 10 | allow: 11 | - dependency-type: production 12 | schedule: 13 | interval: "daily" 14 | -------------------------------------------------------------------------------- /.github/workflows/ruby.yml: -------------------------------------------------------------------------------- 1 | name: Ruby 2 | 3 | "on": 4 | push: 5 | branches: [master] 6 | pull_request: 7 | branches: [master] 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: actions/setup-node@v2 16 | with: 17 | node-version: "20" 18 | 19 | - name: Install dependencies 20 | run: | 21 | sudo apt-get install -yqq yamllint 22 | 23 | - name: Set up Ruby 24 | uses: ruby/setup-ruby@v1 25 | with: 26 | ruby-version: 3.3 27 | bundler-cache: true 28 | 29 | - name: Run linters 30 | run: make lint 31 | 32 | test: 33 | strategy: 34 | fail-fast: false 35 | matrix: 36 | ruby: ["3.3"] 37 | 38 | runs-on: ubuntu-latest 39 | 40 | steps: 41 | - uses: actions/checkout@v2 42 | 43 | - name: Set up Ruby 44 | uses: ruby/setup-ruby@v1 45 | with: 46 | ruby-version: ${{ matrix.ruby }} 47 | bundler-cache: true 48 | 49 | - name: Run tests 50 | run: make test 51 | 52 | test_changed_configs: 53 | runs-on: ubuntu-latest 54 | 55 | steps: 56 | - uses: actions/checkout@v2 57 | 58 | - name: Set up Ruby 59 | uses: ruby/setup-ruby@v1 60 | with: 61 | ruby-version: 3.3 62 | bundler-cache: true 63 | 64 | - name: setup remote origin 65 | run: >- 66 | git remote set-url origin 67 | https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} 68 | 69 | - name: Test changed configs with fetching 70 | run: | 71 | git fetch origin master --depth 1 72 | make test-fetch-changed-configs 73 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /_yardoc/ 4 | /coverage/ 5 | /doc/ 6 | /pkg/ 7 | /spec/reports/ 8 | /tmp/ 9 | /node_modules/ 10 | 11 | # rspec failure tracking 12 | .rspec_status 13 | -------------------------------------------------------------------------------- /.mergify.yml: -------------------------------------------------------------------------------- 1 | queue_rules: 2 | - name: dependabot 3 | queue_conditions: 4 | - author=dependabot[bot] 5 | merge_conditions: 6 | - author=dependabot[bot] 7 | - status-success=test 8 | - status-success=lint 9 | - base=master 10 | merge_method: squash 11 | 12 | pull_request_rules: 13 | - name: refactored queue action rule 14 | conditions: [] 15 | actions: 16 | queue: 17 | -------------------------------------------------------------------------------- /.prettier.rc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 80, 3 | "trailingComma": "none", 4 | "tabWidth": 2, 5 | "useTabs": false, 6 | "bracketSpacing": true, 7 | "semi": false, 8 | "singleQuote": true 9 | } 10 | 11 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --format p 2 | --color 3 | --require spec_helper 4 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | require: 2 | - rubocop-rspec 3 | - rubocop-performance 4 | 5 | AllCops: 6 | DisplayCopNames: true 7 | NewCops: enable 8 | Exclude: 9 | - vendor/**/* 10 | 11 | Layout/ClassStructure: 12 | Enabled: true 13 | 14 | RSpec/DescribeClass: 15 | Exclude: 16 | - "spec/html2rss/configs/**/*.yml_spec.rb" 17 | 18 | Naming/RescuedExceptionsVariableName: 19 | PreferredName: error 20 | -------------------------------------------------------------------------------- /.yamllint.yml: -------------------------------------------------------------------------------- 1 | extends: default 2 | 3 | rules: 4 | document-start: disable 5 | line-length: disable 6 | 7 | indentation: 8 | spaces: 2 9 | indent-sequences: consistent 10 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'https://rubygems.org' 4 | 5 | git_source(:github) { |repo_name| "https://github.com/#{repo_name}" } 6 | 7 | gem 'html2rss', github: 'html2rss/html2rss', branch: :master 8 | 9 | group :development do 10 | # gem 'html2rss-generator', path: '../generator' 11 | gem 'html2rss-generator', github: 'html2rss/generator', branch: :main 12 | 13 | gem 'nokogiri' 14 | gem 'rspec', '~> 3.0' 15 | gem 'rubocop' 16 | gem 'rubocop-performance' 17 | gem 'rubocop-rspec' 18 | end 19 | 20 | gemspec 21 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GIT 2 | remote: https://github.com/html2rss/generator 3 | revision: 05bca799648034e8937e08baaa47b03af4aed5b0 4 | branch: main 5 | specs: 6 | html2rss-generator (0.1.0) 7 | faraday 8 | hashie 9 | htmlbeautifier 10 | tty-markdown 11 | tty-prompt 12 | 13 | GIT 14 | remote: https://github.com/html2rss/html2rss 15 | revision: 70540c7d2accdfab85fb52d3839b31a615ffe3b3 16 | branch: master 17 | specs: 18 | html2rss (0.17.0) 19 | addressable (~> 2.7) 20 | dry-validation 21 | faraday (> 2.0.1, < 3.0) 22 | faraday-follow_redirects 23 | kramdown 24 | mime-types (> 3.0) 25 | nokogiri (>= 1.10, < 2.0) 26 | parallel 27 | puppeteer-ruby 28 | regexp_parser 29 | reverse_markdown (~> 3.0) 30 | rss 31 | sanitize 32 | thor 33 | tzinfo 34 | zeitwerk 35 | 36 | PATH 37 | remote: . 38 | specs: 39 | html2rss-configs (0.2.0) 40 | html2rss 41 | 42 | GEM 43 | remote: https://rubygems.org/ 44 | specs: 45 | addressable (2.8.7) 46 | public_suffix (>= 2.0.2, < 7.0) 47 | ast (2.4.3) 48 | base64 (0.2.0) 49 | bigdecimal (3.1.9) 50 | concurrent-ruby (1.3.5) 51 | crass (1.0.6) 52 | diff-lcs (1.6.2) 53 | dry-configurable (1.3.0) 54 | dry-core (~> 1.1) 55 | zeitwerk (~> 2.6) 56 | dry-core (1.1.0) 57 | concurrent-ruby (~> 1.0) 58 | logger 59 | zeitwerk (~> 2.6) 60 | dry-inflector (1.2.0) 61 | dry-initializer (3.2.0) 62 | dry-logic (1.6.0) 63 | bigdecimal 64 | concurrent-ruby (~> 1.0) 65 | dry-core (~> 1.1) 66 | zeitwerk (~> 2.6) 67 | dry-schema (1.14.1) 68 | concurrent-ruby (~> 1.0) 69 | dry-configurable (~> 1.0, >= 1.0.1) 70 | dry-core (~> 1.1) 71 | dry-initializer (~> 3.2) 72 | dry-logic (~> 1.5) 73 | dry-types (~> 1.8) 74 | zeitwerk (~> 2.6) 75 | dry-types (1.8.2) 76 | bigdecimal (~> 3.0) 77 | concurrent-ruby (~> 1.0) 78 | dry-core (~> 1.0) 79 | dry-inflector (~> 1.0) 80 | dry-logic (~> 1.4) 81 | zeitwerk (~> 2.6) 82 | dry-validation (1.11.1) 83 | concurrent-ruby (~> 1.0) 84 | dry-core (~> 1.1) 85 | dry-initializer (~> 3.2) 86 | dry-schema (~> 1.14) 87 | zeitwerk (~> 2.6) 88 | faraday (2.13.1) 89 | faraday-net_http (>= 2.0, < 3.5) 90 | json 91 | logger 92 | faraday-follow_redirects (0.3.0) 93 | faraday (>= 1, < 3) 94 | faraday-net_http (3.4.0) 95 | net-http (>= 0.5.0) 96 | hashie (5.0.0) 97 | htmlbeautifier (1.4.3) 98 | json (2.12.2) 99 | kramdown (2.5.1) 100 | rexml (>= 3.3.9) 101 | language_server-protocol (3.17.0.5) 102 | lint_roller (1.1.0) 103 | logger (1.7.0) 104 | mime-types (3.7.0) 105 | logger 106 | mime-types-data (~> 3.2025, >= 3.2025.0507) 107 | mime-types-data (3.2025.0527) 108 | net-http (0.6.0) 109 | uri 110 | nokogiri (1.18.8-arm64-darwin) 111 | racc (~> 1.4) 112 | nokogiri (1.18.8-x86_64-darwin) 113 | racc (~> 1.4) 114 | nokogiri (1.18.8-x86_64-linux-gnu) 115 | racc (~> 1.4) 116 | parallel (1.27.0) 117 | parser (3.3.8.0) 118 | ast (~> 2.4.1) 119 | racc 120 | pastel (0.8.0) 121 | tty-color (~> 0.5) 122 | prism (1.4.0) 123 | public_suffix (6.0.2) 124 | puppeteer-ruby (0.45.6) 125 | concurrent-ruby (>= 1.1, < 1.4) 126 | mime-types (>= 3.0) 127 | websocket-driver (>= 0.6.0) 128 | racc (1.8.1) 129 | rainbow (3.1.1) 130 | regexp_parser (2.10.0) 131 | reverse_markdown (3.0.0) 132 | nokogiri 133 | rexml (3.4.1) 134 | rouge (4.5.2) 135 | rspec (3.13.1) 136 | rspec-core (~> 3.13.0) 137 | rspec-expectations (~> 3.13.0) 138 | rspec-mocks (~> 3.13.0) 139 | rspec-core (3.13.4) 140 | rspec-support (~> 3.13.0) 141 | rspec-expectations (3.13.5) 142 | diff-lcs (>= 1.2.0, < 2.0) 143 | rspec-support (~> 3.13.0) 144 | rspec-mocks (3.13.5) 145 | diff-lcs (>= 1.2.0, < 2.0) 146 | rspec-support (~> 3.13.0) 147 | rspec-support (3.13.4) 148 | rss (0.3.1) 149 | rexml 150 | rubocop (1.75.8) 151 | json (~> 2.3) 152 | language_server-protocol (~> 3.17.0.2) 153 | lint_roller (~> 1.1.0) 154 | parallel (~> 1.10) 155 | parser (>= 3.3.0.2) 156 | rainbow (>= 2.2.2, < 4.0) 157 | regexp_parser (>= 2.9.3, < 3.0) 158 | rubocop-ast (>= 1.44.0, < 2.0) 159 | ruby-progressbar (~> 1.7) 160 | unicode-display_width (>= 2.4.0, < 4.0) 161 | rubocop-ast (1.44.1) 162 | parser (>= 3.3.7.2) 163 | prism (~> 1.4) 164 | rubocop-performance (1.25.0) 165 | lint_roller (~> 1.1) 166 | rubocop (>= 1.75.0, < 2.0) 167 | rubocop-ast (>= 1.38.0, < 2.0) 168 | rubocop-rspec (3.6.0) 169 | lint_roller (~> 1.1) 170 | rubocop (~> 1.72, >= 1.72.1) 171 | ruby-progressbar (1.13.0) 172 | sanitize (7.0.0) 173 | crass (~> 1.0.2) 174 | nokogiri (>= 1.16.8) 175 | strings (0.2.1) 176 | strings-ansi (~> 0.2) 177 | unicode-display_width (>= 1.5, < 3.0) 178 | unicode_utils (~> 1.4) 179 | strings-ansi (0.2.0) 180 | thor (1.3.2) 181 | tty-color (0.6.0) 182 | tty-cursor (0.7.1) 183 | tty-markdown (0.7.2) 184 | kramdown (>= 1.16.2, < 3.0) 185 | pastel (~> 0.8) 186 | rouge (>= 3.14, < 5.0) 187 | strings (~> 0.2.0) 188 | tty-color (~> 0.5) 189 | tty-screen (~> 0.8) 190 | tty-prompt (0.23.1) 191 | pastel (~> 0.8) 192 | tty-reader (~> 0.8) 193 | tty-reader (0.9.0) 194 | tty-cursor (~> 0.7) 195 | tty-screen (~> 0.8) 196 | wisper (~> 2.0) 197 | tty-screen (0.8.2) 198 | tzinfo (2.0.6) 199 | concurrent-ruby (~> 1.0) 200 | unicode-display_width (2.6.0) 201 | unicode_utils (1.4.0) 202 | uri (1.0.3) 203 | websocket-driver (0.8.0) 204 | base64 205 | websocket-extensions (>= 0.1.0) 206 | websocket-extensions (0.1.5) 207 | wisper (2.0.1) 208 | zeitwerk (2.7.3) 209 | 210 | PLATFORMS 211 | arm64-darwin-23 212 | arm64-darwin-24 213 | x86_64-darwin-20 214 | x86_64-linux 215 | 216 | DEPENDENCIES 217 | html2rss! 218 | html2rss-configs! 219 | html2rss-generator! 220 | nokogiri 221 | rspec (~> 3.0) 222 | rubocop 223 | rubocop-performance 224 | rubocop-rspec 225 | 226 | BUNDLED WITH 227 | 2.6.6 228 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019 Gil Desmarais 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | default: lint test 2 | 3 | lint: 4 | yamllint lib/html2rss/configs/ .github/ 5 | bundle exec rubocop -P -f quiet 6 | npx prettier --check lib/**/*.yml .github/**/*.yml README.md 7 | 8 | test: 9 | bundle exec rspec 10 | 11 | test-fetch-changed-configs: 12 | bin/rspec_changed_configs 13 | 14 | test-fetch-all-configs: 15 | bundle exec rspec --tag fetch spec/html2rss/configs 16 | 17 | test-all: test test-fetch-all-configs 18 | 19 | lintfix: 20 | bundle exec rubocop -a 21 | npx prettier --write lib/**/*.yml .github/**/*.yml README.md 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![html2rss logo](https://github.com/html2rss/html2rss/raw/master/support/logo.png) 2 | 3 | # html2rss-configs 4 | 5 | ✌️ This repository contains [`html2rss`](https://github.com/html2rss/html2rss) _feed configs_ for many websites. 6 | 👉 Find all _feed configs_ in the [`configs/` directory](https://github.com/html2rss/html2rss-configs/tree/master/lib/html2rss/configs). 7 | ☝️ A handy usage method is via [`html2rss-web`](https://github.com/html2rss/html2rss-web). 8 | 💪 Contributions are more than welcome! 9 | [Fork this repository](https://help.github.com/en/github/getting-started-with-github/fork-a-repo), 10 | add your _feed config_ and 11 | [create a pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request). 12 | 13 | ## Contributing 14 | 15 | The html2rss "ecosystem" is a community project. We welcome contributions of all kinds. This includes new feed configs, suggesting and implementing features, providing bug fixes, documentation improvements, and any other kind of help. 16 | 17 | ### Adding a new feed config 18 | 19 | Which way you choose to add a new feed config is up to you. You can do it manually or risk the "wizard-like" generator. Please [submit a pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork)! 20 | 21 | After you're done, you can test your feed config by running `bundle exec html2rss feed lib/html2rss/configs//.yml`. 22 | 23 | #### Prefered way: manually 24 | 25 | 1. Fork this repo and run `bundle install` (you need to have Ruby >= 3.1 installed). 26 | 2. Create a new folder and file following ths convention: `lib/html2rss/configs//.yml` 27 | 3. Create the feed config in the `.yml` file. 28 | 4. Add this spec file in the `spec/html2rss/configs//_spec.rb` file. 29 | 30 | ```ruby 31 | RSpec.describe '/' do 32 | include_examples 'config.yml', described_class 33 | end 34 | ``` 35 | 36 | #### Using the "wizard-like" generator 37 | 38 | There's was a try to build a wizard like TUI based generator for that! 🆒 It hasn't seen much love, tho, but it might gets you going or crash in the middle of the process. 39 | 40 | 1. Fork this repo and run `bundle install` (you need to have Ruby >= 3.1 installed). 41 | 2. Start the generator by typing: `bin/generator` 42 | 3. Build your feed config and answer 'y' in the last step to create the files. 43 | 4. Optionally, edit the created files. Read [`html2rss`'s README](https://github.com/html2rss/html2rss/blob/master/README.md) to see what is possible or browse [existing configs](https://github.com/html2rss/html2rss-configs/tree/master/lib/html2rss/configs) for inspiration. 44 | 5. To test, run: 45 | `bundle exec html2rss feed lib/html2rss/configs/domainname.tld/whatever` 46 | 47 | ## Using dynamic parameters in `channel` attributes 48 | 49 | When you're using dynamic parameters, you have to provide the parameters to the spec, too: 50 | 51 | ```ruby 52 | include_examples 'config.yml', 'domainname.tld/whatever.yml', id: 42 53 | ``` 54 | 55 | CLI usage: 56 | 57 | ```sh 58 | bundle exec html2rss feed lib/html2rss/configs/domainname.tld/whatever id=42 59 | ``` 60 | 61 | ## Programmatic usage 62 | 63 | Add to your Gemfile: 64 | 65 | ```ruby 66 | gem 'html2rss-configs', git: 'https://github.com/html2rss/html2rss-configs.git' 67 | ``` 68 | 69 | Use it in your code: 70 | 71 | ```ruby 72 | require 'html2rss/configs' 73 | 74 | config = Html2rss::Configs.find_by_name('domainname.tld/whatever') 75 | ``` 76 | 77 | This will return the _feed config_. 78 | 79 | ## Usage with `html2rss-web` 80 | 81 | If you're running [`html2rss-web`](https://github.com/html2rss/html2rss-web), 82 | you have nothing more to do! 🎉 Just request them from your instance at path: `/.rss` and you'll be served the RSS. 83 | 84 | ## CI: Building on the CI 85 | 86 | Modifying existing or adding new _feed configs_ will trigger the CI to fetch the feed 87 | and check for the presence of feed items. 88 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require 'bundler/setup' 5 | require 'html2rss/configs' 6 | 7 | # You can add fixtures and/or initialization code here to make experimenting 8 | # with your gem easier. You can also use a different console, if you like. 9 | 10 | # (If you use this, don't forget to add pry to your Gemfile!) 11 | # require "pry" 12 | # Pry.start 13 | 14 | require 'irb' 15 | IRB.start(__FILE__) 16 | -------------------------------------------------------------------------------- /bin/generator: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require 'bundler/setup' 5 | require 'html2rss/generator' 6 | 7 | Html2rss::Generator.start 8 | -------------------------------------------------------------------------------- /bin/json_to_xml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require 'bundler/setup' 5 | require 'html2rss/item' 6 | require 'html2rss/utils' 7 | 8 | raise ArgumentError unless ARGV[0].is_a?(String) 9 | 10 | puts Html2rss::Utils.request_body_from_url(ARGV[0], convert_json_to_xml: true) 11 | -------------------------------------------------------------------------------- /bin/rspec_changed_configs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | changed_files = `git diff --name-only origin/master | grep 'lib/html2rss/configs/.*/.*.yml'`.split("\n") 5 | 6 | specs_to_run = changed_files.filter_map do |file| 7 | filepath = File.expand_path File.join(__dir__, '..', file) 8 | 9 | "#{file.gsub('lib/', 'spec/')}_spec.rb" if File.exist?(filepath) 10 | end 11 | 12 | if specs_to_run.count.positive? 13 | exec "bundle exec rspec --tag fetch #{specs_to_run.join(' ')}" 14 | else 15 | exit 0 16 | end 17 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /html2rss-configs.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | lib = File.expand_path('lib', __dir__) 4 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 5 | require 'html2rss/configs/version' 6 | 7 | Gem::Specification.new do |spec| 8 | spec.name = 'html2rss-configs' 9 | spec.version = Html2rss::Configs::VERSION 10 | spec.authors = ['Gil Desmarais'] 11 | spec.email = %w[html2rss-configs@desmarais.de] 12 | 13 | spec.summary = 'Collection of ready-to-use html2rss configs.' 14 | spec.description = 'Configs which contain information how to generate RSS items from websites.' 15 | spec.homepage = 'https://github.com/html2rss/html2rss-configs' 16 | spec.license = 'MIT' 17 | spec.required_ruby_version = '>= 3.1' 18 | 19 | # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host' 20 | # to allow pushing to a single host or delete this section to allow pushing to any host. 21 | if spec.respond_to?(:metadata) 22 | spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'" 23 | 24 | spec.metadata['homepage_uri'] = spec.homepage 25 | spec.metadata['source_code_uri'] = 'https://github.com/html2rss/html2rss-configs' 26 | spec.metadata['rubygems_mfa_required'] = 'true' 27 | 28 | # spec.metadata['changelog_uri'] = "TODO: Put your gem's CHANGELOG.md URL here." 29 | else 30 | raise 'RubyGems 2.0 or newer is required to protect against ' \ 31 | 'public gem pushes.' 32 | end 33 | 34 | # Specify which files should be added to the gem when it is released. 35 | # The `git ls-files -z` loads the files in the RubyGem that have been added into git. 36 | spec.files = Dir.chdir(File.expand_path(__dir__)) do 37 | `git ls-files -z`.split("\x0").reject do |f| 38 | f[0] == '.' || f.match(%r{^(bin|test|spec|features|Makefile|Gemfile)/?}) 39 | end 40 | end 41 | spec.bindir = 'exe' 42 | spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } 43 | spec.require_paths = ['lib'] 44 | 45 | spec.add_dependency 'html2rss' 46 | end 47 | -------------------------------------------------------------------------------- /lib/html2rss/configs.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'html2rss/configs/version' 4 | require 'yaml' 5 | 6 | module Html2rss 7 | ## 8 | # The namespace for this gem 9 | module Configs 10 | class Error < Html2rss::Error; end 11 | class ConfigNotFound < Html2rss::Configs::Error; end 12 | 13 | ## 14 | # @return [Array] 15 | def self.file_names 16 | @file_names ||= Dir[File.join(__dir__, '**', '*.yml')].freeze 17 | end 18 | 19 | ## 20 | # @param name [String] the name of the config to find. format: `domainname/name` 21 | # @return [Hash] the hash to create a Html2rss::Config 22 | def self.find_by_name(name) 23 | raise 'name must be a string' unless name.is_a?(String) 24 | raise 'name must be in folder/file format' unless name.include?('/') 25 | 26 | name = "#{name}.yml" unless name.end_with?('.yml') 27 | 28 | file_name = file_names.find { |f| f.end_with?(name) } 29 | 30 | raise ConfigNotFound unless file_name 31 | 32 | YAML.safe_load(File.open(file_name), symbolize_names: true).freeze 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/html2rss/configs/adfc.de/pressemitteilungen.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.adfc.de/presse/pressemitteilungen/ 3 | time_zone: Europe/Berlin 4 | ttl: 720 5 | language: de 6 | selectors: 7 | items: 8 | selector: "#articleteasercontainer-columns .articleteaser" 9 | title: 10 | selector: ".articleteaser-header-container a" 11 | url: 12 | selector: "a:first" 13 | extractor: "href" 14 | description: 15 | selector: ".articleteaser-header-container p" 16 | -------------------------------------------------------------------------------- /lib/html2rss/configs/apnews.com/hub.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://apnews.com/%
s 4 | language: en 5 | ttl: 120 6 | time_zone: UTC 7 | selectors: 8 | items: 9 | selector: ".PagePromo" 10 | title: 11 | selector: h3 12 | url: 13 | selector: a:first 14 | extractor: href 15 | description: 16 | selector: .PagePromoContentIcons-text 17 | -------------------------------------------------------------------------------- /lib/html2rss/configs/avherald.com/index.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://avherald.com/ 4 | language: en 5 | ttl: 120 6 | time_zone: UTC 7 | selectors: 8 | items: 9 | selector: "table table a" 10 | title: 11 | selector: span 12 | url: 13 | extractor: href 14 | -------------------------------------------------------------------------------- /lib/html2rss/configs/bbc.co.uk/available_episodes.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.bbc.co.uk/programmes/%s/episodes/player 3 | time_zone: UTC 4 | ttl: 720 5 | selectors: 6 | items: 7 | selector: ".highlight-box-wrapper > div" 8 | title: 9 | selector: ".programme__title" 10 | url: 11 | selector: ".programme__titles > a" 12 | extractor: "href" 13 | description: 14 | selector: ".programme__synopsis" 15 | -------------------------------------------------------------------------------- /lib/html2rss/configs/bbc.com/mundo.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://www.bbc.com/mundo 4 | language: es 5 | ttl: 360 6 | time_zone: UTC 7 | selectors: 8 | items: 9 | selector: '[data-testid="topic-promos"] > li' 10 | title: 11 | selector: a 12 | url: 13 | selector: a:first 14 | extractor: href 15 | description: 16 | selector: p 17 | -------------------------------------------------------------------------------- /lib/html2rss/configs/canarianweekly.com/front.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.canarianweekly.com/ 3 | time_zone: Europe/London 4 | ttl: 720 5 | language: en 6 | selectors: 7 | items: 8 | selector: ".article__entry" 9 | title: 10 | selector: "h5 > a" 11 | url: 12 | selector: "h5 > a" 13 | extractor: "href" 14 | -------------------------------------------------------------------------------- /lib/html2rss/configs/cinemascore.com/index.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://webapp.cinemascore.com/guest/surveys 3 | ttl: 720 4 | json: true 5 | time_zone: America/Los_Angeles 6 | selectors: 7 | items: 8 | selector: "new > array > object, recent > array > object" 9 | title: 10 | selector: "title" 11 | post_process: 12 | - name: "template" 13 | string: "%{self} (%{grade}) [Rated %{mpaa_rating}] (%{year})" 14 | grade: 15 | selector: "grade" 16 | movie_title: 17 | selector: "movie_title" 18 | url: 19 | post_process: 20 | - name: "template" 21 | string: "https://www.metacritic.com/search/movie/%{movie_title}/results" 22 | mpaa_rating: 23 | selector: "mpaa_rating" 24 | distributor: 25 | selector: "distributor" 26 | year: 27 | selector: "year" 28 | categories: 29 | - mpaa_rating 30 | - grade 31 | - year 32 | description: 33 | post_process: 34 | - name: template 35 | string: | 36 | # %{movie_title} 37 | 38 | Grade: %{grade} 39 | 40 | MPAA Rating: %{mpaa_rating} 41 | 42 | Distributor: %{distributor} 43 | 44 | Year: %{year} 45 | - name: markdown_to_html 46 | -------------------------------------------------------------------------------- /lib/html2rss/configs/cleanenergywire.org/news.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.cleanenergywire.org/news-archive 3 | time_zone: "Europe/Berlin" 4 | ttl: 360 5 | selectors: 6 | items: 7 | selector: ".view-content article" 8 | title: 9 | selector: "h3 a" 10 | url: 11 | selector: "h3 a" 12 | extractor: "href" 13 | published_at: 14 | selector: ".date-display-single" 15 | post_process: 16 | - name: parse_time 17 | -------------------------------------------------------------------------------- /lib/html2rss/configs/cnet.com/section_sub.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://www.cnet.com/%
s/%s/ 4 | language: en 5 | ttl: 360 6 | time_zone: UTC 7 | selectors: 8 | items: 9 | selector: ".c-storiesNeonHighlightsCard" 10 | title: 11 | selector: ".g-text-bold" 12 | url: 13 | selector: a:first 14 | extractor: href 15 | description: 16 | selector: span 17 | -------------------------------------------------------------------------------- /lib/html2rss/configs/computerbase.de/meistgelesen.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | title: "computerbase.de: meistgelesen" 3 | url: https://www.computerbase.de 4 | time_zone: Europe/Berlin 5 | ttl: 360 6 | language: de 7 | selectors: 8 | items: 9 | selector: ".charts > li" 10 | title: 11 | selector: ".charts__title" 12 | post_process: 13 | - name: "template" 14 | string: "%{context}: %{self}" 15 | context: 16 | selector: ".charts__context" 17 | url: 18 | selector: ".charts__link" 19 | extractor: "href" 20 | -------------------------------------------------------------------------------- /lib/html2rss/configs/cutle.fish/index.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://cutle.fish/ 4 | ttl: 360 5 | time_zone: UTC 6 | selectors: 7 | items: 8 | selector: main > div > div 9 | title: 10 | selector: a 11 | url: 12 | selector: a 13 | extractor: href 14 | description: 15 | selector: a 16 | -------------------------------------------------------------------------------- /lib/html2rss/configs/deraktionaer.de/meistgelesen.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | title: "deraktionaer.de: meistgelesen" 3 | url: https://deraktionaer.de/ 4 | time_zone: Europe/Berlin 5 | ttl: 360 6 | language: de 7 | selectors: 8 | items: 9 | selector: "#most-viewed ol > li" 10 | title: 11 | selector: "> a" 12 | url: 13 | selector: "> a" 14 | extractor: "href" 15 | isin: 16 | selector: ".stock-info" 17 | extractor: attribute 18 | attribute: "data-quote" 19 | categories: 20 | - isin 21 | -------------------------------------------------------------------------------- /lib/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | json: true 4 | url: https://developer.apple.com/tutorials/data/documentation/Technotes.json 5 | ttl: 360 6 | time_zone: UTC 7 | selectors: 8 | items: 9 | selector: "references > object > * > object:has(url)" 10 | title: 11 | selector: title 12 | url: 13 | selector: url 14 | post_process: 15 | - name: parse_uri 16 | description: 17 | selector: abstract text 18 | -------------------------------------------------------------------------------- /lib/html2rss/configs/dfs.de/pressemitteilungen.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.dfs.de/homepage/de/medien/presse/ 3 | time_zone: Europe/Berlin 4 | ttl: 1440 5 | language: de 6 | selectors: 7 | items: 8 | selector: ".article-teaser-list article" 9 | title: 10 | selector: ".headline" 11 | url: 12 | selector: "a" 13 | extractor: "href" 14 | published_at: 15 | selector: "time" 16 | extractor: "attribute" 17 | attribute: "datetime" 18 | post_process: 19 | - name: "parse_time" 20 | -------------------------------------------------------------------------------- /lib/html2rss/configs/dsw-info.de/presse.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.dsw-info.de/presse 3 | time_zone: Europe/Berlin 4 | ttl: 720 5 | language: de 6 | selectors: 7 | items: 8 | selector: ".cesprop-0 .container.zw-01 .frame:nth-child(2) ul > li" 9 | title: 10 | selector: "a" 11 | url: 12 | selector: "a" 13 | extractor: "href" 14 | published_at: 15 | selector: ".vd small" 16 | post_process: 17 | - name: parse_time 18 | description: 19 | selector: ".bt small" 20 | -------------------------------------------------------------------------------- /lib/html2rss/configs/espn.com/f1.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.espn.com/f1/ 3 | time_zone: UTC 4 | ttl: 60 5 | selectors: 6 | items: 7 | selector: ".headlineStack__list > li" 8 | title: 9 | selector: "a" 10 | url: 11 | selector: "a" 12 | extractor: "href" 13 | -------------------------------------------------------------------------------- /lib/html2rss/configs/fia.com/documents.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://www.fia.com/documents/championships/fia-formula-one-world-championship-14/season/season-2025-2071 4 | ttl: 360 5 | time_zone: UTC 6 | selectors: 7 | items: 8 | selector: ".decision-document-list .document-row" 9 | title: 10 | selector: ".title" 11 | url: 12 | selector: a 13 | extractor: href 14 | description: 15 | selector: a 16 | extractor: html 17 | -------------------------------------------------------------------------------- /lib/html2rss/configs/formula1.com/latest.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.formula1.com/en/latest/all.html 3 | time_zone: UTC 4 | ttl: 120 5 | selectors: 6 | items: 7 | selector: "#article-list > li" 8 | title: 9 | selector: "p" 10 | url: 11 | selector: "a" 12 | extractor: "href" 13 | categories: 14 | - category_a 15 | - category_b 16 | category_a: 17 | selector: "figcaption>span:first" 18 | category_b: 19 | selector: "figcaption>span:last" 20 | -------------------------------------------------------------------------------- /lib/html2rss/configs/github.com/releases.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://github.com/%s/%s/releases 3 | time_zone: UTC 4 | ttl: 720 5 | description: "Releases of %s/%s on github.com." 6 | selectors: 7 | items: 8 | selector: ".repository-content section" 9 | title: 10 | selector: "h2" 11 | url: 12 | selector: "a" 13 | extractor: "href" 14 | description: 15 | selector: '[data-test-selector="body-content"]' 16 | extractor: "html" 17 | -------------------------------------------------------------------------------- /lib/html2rss/configs/iaapa.org/news.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.iaapa.org/news 3 | time_zone: UTC 4 | ttl: 720 5 | selectors: 6 | items: 7 | selector: ".views-row > article" 8 | title: 9 | selector: h3 10 | description: 11 | selector: ".event-card__summary" 12 | url: 13 | selector: "a" 14 | extractor: "href" 15 | -------------------------------------------------------------------------------- /lib/html2rss/configs/imdb.com/ratings.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.imdb.com/user/%s/ratings 3 | time_zone: UTC 4 | ttl: 1440 5 | selectors: 6 | items: 7 | selector: "li.ipc-metadata-list-summary-item" 8 | title: 9 | selector: ".ipc-title__text" 10 | post_process: 11 | - name: gsub 12 | pattern: "/^(\\d+.)\\s/" 13 | replacement: "" 14 | - name: template 15 | string: 'Rated: "%{self}" %{user_rating} [IMDb: %{global_rating}]' 16 | url: 17 | selector: "a.ipc-title-link-wrapper" 18 | extractor: "href" 19 | description: 20 | selector: ".ipc-html-content-inner-div" 21 | global_rating: 22 | selector: ".ipc-rating-star--imdb .ipc-rating-star--rating" 23 | user_rating: 24 | selector: ".dli-ratings-container .ipc-rating-star--otherUser span" 25 | enclosure: 26 | selector: ".ipc-image" 27 | extractor: "attribute" 28 | attribute: "src" 29 | -------------------------------------------------------------------------------- /lib/html2rss/configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://www.ingenieur.de/karriere/arbeitsleben/heiko-mell/ 4 | language: de-DE 5 | ttl: 360 6 | time_zone: UTC 7 | selectors: 8 | items: 9 | selector: ".section > .ing-teaser" 10 | title: 11 | selector: ".ing-teaser__headline" 12 | url: 13 | selector: a:first 14 | extractor: href 15 | description: 16 | selector: ".ing-teaser__excerpt" 17 | -------------------------------------------------------------------------------- /lib/html2rss/configs/kinocheck.de/filmstarts.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://kinocheck.de/filmstarts 3 | time_zone: Europe/Berlin 4 | ttl: 1440 5 | language: de 6 | selectors: 7 | items: 8 | selector: ".list-movies-wide-item .wrapper-content" 9 | title: 10 | selector: "h2" 11 | post_process: 12 | - name: "template" 13 | string: "%{self} (%{start_date})" 14 | url: 15 | selector: "a" 16 | extractor: "href" 17 | description: 18 | selector: ".information" 19 | extractor: "html" 20 | start_date: 21 | selector: ".release-date .highlight" 22 | -------------------------------------------------------------------------------- /lib/html2rss/configs/newyorker.com/magazine.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://www.newyorker.com/magazine 4 | language: en 5 | ttl: 360 6 | time_zone: UTC 7 | selectors: 8 | items: 9 | selector: .summary-item 10 | title: 11 | selector: h3 12 | url: 13 | selector: a:first 14 | extractor: href 15 | description: 16 | selector: .summary-item__dek 17 | -------------------------------------------------------------------------------- /lib/html2rss/configs/nomanssky.com/news.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://www.nomanssky.com/news/ 4 | language: en-GB 5 | ttl: 360 6 | time_zone: UTC 7 | selectors: 8 | items: 9 | selector: article 10 | title: 11 | selector: ".post-title" 12 | url: 13 | selector: a.view-article 14 | extractor: href 15 | description: 16 | selector: p:first 17 | post_process: 18 | - name: substring 19 | start: 0 20 | end: -14 21 | -------------------------------------------------------------------------------- /lib/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://pankow.lebensmittel-kontrollergebnisse.de/Search 4 | language: de 5 | ttl: 360 6 | time_zone: Europe/Berlin 7 | selectors: 8 | items: 9 | selector: main .card 10 | title: 11 | selector: h5 12 | post_process: 13 | - name: template 14 | string: | 15 | %{self}: %{ergebnis} (%{punkte}/80 Punkte) 16 | url: 17 | selector: a 18 | extractor: href 19 | description: 20 | selector: ".row" 21 | extractor: html 22 | post_process: 23 | - name: html_to_markdown 24 | - name: markdown_to_html 25 | ergebnis: 26 | selector: ".bewertung-panel .row:nth-child(3)" 27 | post_process: 28 | - name: substring 29 | start: 9 30 | punkte: 31 | selector: ".bewertung-panel .row:nth-child(2)" 32 | post_process: 33 | - name: substring 34 | start: 10 35 | published_at: 36 | selector: ".bewertung-panel .row:nth-child(1)" 37 | post_process: 38 | - name: substring 39 | start: -10 40 | - name: parse_time 41 | categories: 42 | - ergebnis 43 | - punkte 44 | -------------------------------------------------------------------------------- /lib/html2rss/configs/phys.org/weekly.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://phys.org/weekly-news/ 3 | time_zone: Europe/London 4 | ttl: 1440 5 | selectors: 6 | items: 7 | selector: ".sorted-news-list .sorted-article-content" 8 | title: 9 | selector: "h4" 10 | category: 11 | selector: ".text-info" 12 | categories: 13 | - category 14 | url: 15 | selector: ".news-link" 16 | extractor: "href" 17 | -------------------------------------------------------------------------------- /lib/html2rss/configs/rbb24.de/meistgeklickt.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | title: "rbb24.de: meistgeklickt" 3 | url: https://rbb24.de/ 4 | time_zone: Europe/Berlin 5 | ttl: 30 6 | language: de 7 | selectors: 8 | items: 9 | selector: ".tabmodul_container > li:last-child > ul > li" 10 | title: 11 | selector: "a" 12 | url: 13 | selector: "a" 14 | extractor: "href" 15 | -------------------------------------------------------------------------------- /lib/html2rss/configs/robinwood.de/aktuelles.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.robinwood.de/was-gibt-es-neues/aktuelles 3 | time_zone: Europe/Berlin 4 | ttl: 720 5 | language: de 6 | selectors: 7 | items: 8 | selector: "article[data-history-node-id]" 9 | title: 10 | selector: "h2" 11 | url: 12 | selector: "a" 13 | extractor: "href" 14 | description: 15 | selector: ".teaser-text" 16 | -------------------------------------------------------------------------------- /lib/html2rss/configs/s3.amazonaws.com/popular_movies.yml: -------------------------------------------------------------------------------- 1 | # This generates a RSS of the daily updated 2 | # https://github.com/sjlu/popular-movies 3 | channel: 4 | url: https://s3.amazonaws.com/popular-movies/movies.json 5 | time_zone: UTC 6 | ttl: 1440 7 | json: true 8 | selectors: 9 | items: 10 | selector: "array > object" 11 | title: 12 | selector: "title" 13 | imdb_id: 14 | selector: "imdb_id" 15 | poster_url: 16 | selector: "poster_url" 17 | url: 18 | post_process: 19 | - name: template 20 | string: "https://imdb.com/title/%{imdb_id}" 21 | description: 22 | post_process: 23 | - name: template 24 | string: "![](%{poster_url})" 25 | - name: markdown_to_html 26 | -------------------------------------------------------------------------------- /lib/html2rss/configs/sebastianvettel.de/news.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://sebastianvettel.de/news/ 4 | language: de-DE 5 | ttl: 8640 6 | time_zone: Europe/Berlin 7 | selectors: 8 | items: 9 | selector: ".card.title_in_content" 10 | title: 11 | selector: h2 12 | description: 13 | selector: ".row-text-bild-modul" 14 | extractor: html 15 | -------------------------------------------------------------------------------- /lib/html2rss/configs/softwareleadweekly.com/issues.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://softwareleadweekly.com/issues 3 | time_zone: UTC 4 | ttl: 720 5 | selectors: 6 | items: 7 | selector: ".table-issue" 8 | title: 9 | selector: "a" 10 | url: 11 | selector: "a" 12 | extractor: "href" 13 | -------------------------------------------------------------------------------- /lib/html2rss/configs/solarthermalworld.org/news.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://solarthermalworld.org/news 3 | time_zone: UTC 4 | ttl: 180 5 | selectors: 6 | items: 7 | selector: "article.post-wrapper" 8 | title: 9 | selector: "h2 a" 10 | url: 11 | selector: "h2 a" 12 | extractor: "href" 13 | description: 14 | selector: '[property="content:encoded"]' 15 | extractor: html 16 | -------------------------------------------------------------------------------- /lib/html2rss/configs/spektrum.de/meistgelesen.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | title: "spektrum.de: meistgelesen" 3 | url: https://www.spektrum.de/ 4 | time_zone: Europe/Berlin 5 | ttl: 60 6 | language: de 7 | selectors: 8 | items: 9 | selector: ".teaser.teaser__list_box ol > li" 10 | title: 11 | selector: "a" 12 | url: 13 | selector: "a" 14 | extractor: "href" 15 | -------------------------------------------------------------------------------- /lib/html2rss/configs/spiegel.de/impressum_autor.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: "https://www.spiegel.de/impressum/autor-%s" 3 | time_zone: Europe/Berlin 4 | ttl: 180 5 | language: de 6 | selectors: 7 | items: 8 | selector: '[data-block-el="articleTeaser"] > article' 9 | title: 10 | selector: "h2" 11 | prefixed_title: 12 | selector: "h2" 13 | url: 14 | selector: "section > a" 15 | extractor: "href" 16 | descripton: 17 | selector: ".leading-loose" 18 | published_at: 19 | selector: "footer" 20 | extractor: text 21 | post_process: 22 | - name: substring 23 | start: 0 24 | end: -5 25 | - name: parse_time 26 | -------------------------------------------------------------------------------- /lib/html2rss/configs/stackoverflow.com/hot_network_questions.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | title: "stackoverflow.com: Hot Network Questions" 3 | url: https://stackoverflow.com/questions 4 | time_zone: America/New_York 5 | ttl: 30 6 | selectors: 7 | items: 8 | selector: "#hot-network-questions ul > li" 9 | title: 10 | selector: a 11 | url: 12 | selector: a 13 | extractor: href 14 | network: 15 | selector: ".favicon" 16 | extractor: attribute 17 | attribute: title 18 | categories: 19 | - network 20 | -------------------------------------------------------------------------------- /lib/html2rss/configs/steuerzahler.de/news.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.steuerzahler.de/news 3 | time_zone: Europe/Berlin 4 | ttl: 720 5 | language: de 6 | selectors: 7 | items: 8 | selector: ".bdst_presslist .bdst_presslist__block" 9 | title: 10 | selector: "h4" 11 | url: 12 | selector: "a" 13 | extractor: "href" 14 | description: 15 | selector: ".bdst_presslist__block__teaser" 16 | published_at: 17 | selector: ".bdst_presslist__block__date" 18 | post_process: 19 | - name: "parse_time" 20 | -------------------------------------------------------------------------------- /lib/html2rss/configs/stripes.com/index.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://www.stripes.com/ 4 | language: en 5 | ttl: 360 6 | time_zone: America/New_York 7 | selectors: 8 | items: 9 | selector: '[class$="teaser"]' 10 | title: 11 | selector: a 12 | url: 13 | selector: a 14 | extractor: href 15 | description: 16 | selector: img 17 | extractor: attribute 18 | attribute: alt 19 | -------------------------------------------------------------------------------- /lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://support.apple.com/en-gb/HT201222 4 | language: en 5 | ttl: 360 6 | time_zone: UTC 7 | selectors: 8 | items: 9 | selector: ".table-wrapper table tbody > tr:not(:first-child)" 10 | title: 11 | selector: a 12 | url: 13 | selector: a:first 14 | extractor: href 15 | description: 16 | selector: td:nth-child(2) 17 | published_at: 18 | selector: td:nth-child(3) 19 | post_process: 20 | - name: parse_time 21 | -------------------------------------------------------------------------------- /lib/html2rss/configs/support.apple.com/exchange_repair.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://support.apple.com/exchange_repair 3 | time_zone: America/Los_Angeles 4 | ttl: 720 5 | selectors: 6 | items: 7 | selector: ".main .as-columns--2up-extended" 8 | title: 9 | selector: "a" 10 | url: 11 | selector: "a" 12 | extractor: "href" 13 | img: 14 | selector: "img" 15 | extractor: "attribute" 16 | attribute: "src" 17 | published_at: 18 | selector: ".note" 19 | post_process: 20 | - name: parse_time 21 | description: 22 | post_process: 23 | - name: template 24 | string: | 25 | # ![](%{img}) 26 | - name: markdown_to_html 27 | -------------------------------------------------------------------------------- /lib/html2rss/configs/teneriffa-news.com/news.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.teneriffa-news.com/news 3 | time_zone: "Europe/Lisbon" 4 | ttl: 720 5 | language: de 6 | selectors: 7 | items: 8 | selector: ".main-content article" 9 | title: 10 | selector: "h2" 11 | url: 12 | selector: "a" 13 | extractor: "href" 14 | img_url: 15 | selector: "img" 16 | extractor: "attribute" 17 | attribute: "data-lazy-src" 18 | description: 19 | selector: ".excerpt" 20 | post_process: 21 | - name: template 22 | string: | 23 | ![](%{img_url}) 24 | 25 | %{self} 26 | - name: markdown_to_html 27 | -------------------------------------------------------------------------------- /lib/html2rss/configs/test.de/archiv.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://www.test.de/archiv/ 4 | language: de 5 | ttl: 360 6 | time_zone: Europe/Berlin 7 | selectors: 8 | items: 9 | selector: ".archive__list > li" 10 | title: 11 | selector: a 12 | url: 13 | selector: a:first 14 | extractor: href 15 | description: 16 | selector: p 17 | -------------------------------------------------------------------------------- /lib/html2rss/configs/theguardian.com/international_mostpopular.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | title: "theguardian.com: International most popular" 3 | url: https://www.theguardian.com/international 4 | time_zone: Europe/London 5 | ttl: 60 6 | selectors: 7 | items: 8 | selector: '[data-link-name="most-viewed"] > ol > li' 9 | title: 10 | selector: "h4 span" 11 | url: 12 | selector: "a" 13 | extractor: "href" 14 | -------------------------------------------------------------------------------- /lib/html2rss/configs/thoughtworks.com/insights.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.thoughtworks.com/insights 3 | language: en 4 | ttl: 360 5 | time_zone: UTC 6 | selectors: 7 | items: 8 | selector: ".cmp-content-card" 9 | title: 10 | selector: ".cmp-content-card__text__para-text" 11 | url: 12 | selector: a:first 13 | extractor: href 14 | description: 15 | selector: p 16 | -------------------------------------------------------------------------------- /lib/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml: -------------------------------------------------------------------------------- 1 | channel: 2 | url: https://www.tourismusnetzwerk-brandenburg.de/nc/aktuelle-nachrichten/ 3 | time_zone: Europe/Berlin 4 | ttl: 720 5 | language: de 6 | selectors: 7 | items: 8 | selector: "article.article" 9 | title: 10 | selector: "h3" 11 | url: 12 | selector: "a" 13 | extractor: "href" 14 | topic: 15 | selector: ".field--item" 16 | categories: 17 | - topic 18 | description: 19 | selector: "p" 20 | -------------------------------------------------------------------------------- /lib/html2rss/configs/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Html2rss 4 | module Configs 5 | VERSION = '0.2.0' 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /lib/html2rss/configs/webentwickler-jobs.de/in.yml: -------------------------------------------------------------------------------- 1 | --- 2 | channel: 3 | url: https://www.webentwickler-jobs.de/in/%s 4 | language: de 5 | ttl: 360 6 | time_zone: Europe/Berlin 7 | selectors: 8 | items: 9 | selector: ".job-item" 10 | title: 11 | selector: ".job-title" 12 | url: 13 | selector: a:first 14 | extractor: href 15 | description: 16 | selector: "> div" 17 | extractor: html 18 | -------------------------------------------------------------------------------- /spec/html2rss/configs/adfc.de/pressemitteilungen.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'adfc.de/pressemitteilungen.yml' do 4 | it_behaves_like 'config.yml', 'adfc.de/pressemitteilungen.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/apnews.com/hub.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'apnews.com/hub.yml' do 4 | it_behaves_like 'config.yml', 'apnews.com/hub.yml', section: 'world-news' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/avherald.com/index.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'avherald.com/index.yml' do 4 | it_behaves_like 'config.yml', 'avherald.com/index.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/bbc.co.uk/available_episodes.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'bbc.co.uk/available-episodes.yml' do 4 | it_behaves_like 'config.yml', 'bbc.co.uk/available_episodes.yml', id: 'b006wkfp' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/bbc.com/mundo.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'bbc.com/mundo.yml' do 4 | it_behaves_like 'config.yml', 'bbc.com/mundo.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/canarianweekly.com/front.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'canarianweekly.com/front.yml' do 4 | it_behaves_like 'config.yml', 'canarianweekly.com/front.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/cinemascore.com/index.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'cinemascore.com/index.yml' do 4 | it_behaves_like 'config.yml', 'cinemascore.com/index.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/cleanenergywire.org/news.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'cleanenergywire.org/news.yml' do 4 | it_behaves_like 'config.yml', 'cleanenergywire.org/news.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/cnet.com/section_sub.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'cnet.com/section_sub.yml' do 4 | it_behaves_like 'config.yml', 'cnet.com/section_sub.yml', section: 'culture', sub: 'internet' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/computerbase.de/meistgelesen.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'computerbase.de/meistgelesen.yml' do 4 | it_behaves_like 'config.yml', 'computerbase.de/meistgelesen.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/cutle.fish/index.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'cutle.fish/index.yml' do 4 | it_behaves_like 'config.yml', 'cutle.fish/index.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/deraktionaer.de/meistgelesen.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'deraktionaer.de/meistgelesen.yml' do 4 | it_behaves_like 'config.yml', 'deraktionaer.de/meistgelesen.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'developer.apple.com/tutorials_data_documentation_technotes_json.yml' do 4 | it_behaves_like 'config.yml', 'developer.apple.com/tutorials_data_documentation_technotes_json.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/dfs.de/pressemitteilungen.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'dfs.de/pressemitteilungen.yml' do 4 | it_behaves_like 'config.yml', 'dfs.de/pressemitteilungen.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/dsw-info.de/presse.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'dsw-info.de/presse.yml' do 4 | it_behaves_like 'config.yml', 'dsw-info.de/presse.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/espn.com/f1.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'espn.com/f1.yml' do 4 | it_behaves_like 'config.yml', 'espn.com/f1.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/fia.com/documents.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'fia.com/documents.yml' do 4 | it_behaves_like 'config.yml', 'fia.com/documents.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/formula1.com/latest.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'formula1.com/latest.yml' do 4 | it_behaves_like 'config.yml', 'formula1.com/latest.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/github.com/releases.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'github.com/releases.yml' do 4 | it_behaves_like 'config.yml', 'github.com/releases.yml', username: 'nuxt', repository: 'nuxt.js' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/iaapa.org/news.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'iaapa.org/news.yml' do 4 | it_behaves_like 'config.yml', 'iaapa.org/news.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/imdb.com/ratings.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'imdb.com/ratings.yml' do 4 | it_behaves_like 'config.yml', 'imdb.com/ratings.yml', user_id: 'ur7019649' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'ingenieur.de/karriere_arbeitsleben_heiko_mell.yml' do 4 | it_behaves_like 'config.yml', 'ingenieur.de/karriere_arbeitsleben_heiko_mell.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/kinocheck.de/filmstarts.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'kinocheck.de/filmstarts.yml' do 4 | it_behaves_like 'config.yml', 'kinocheck.de/filmstarts.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/newyorker.com/magazine.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'newyorker.com/magazine.yml' do 4 | it_behaves_like 'config.yml', 'newyorker.com/magazine.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/nomanssky.com/news.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'nomanssky.com/news.yml' do 4 | it_behaves_like 'config.yml', 'nomanssky.com/news.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'pankow.lebensmittel-kontrollergebnisse.de/search.yml' do 4 | it_behaves_like 'config.yml', 'pankow.lebensmittel-kontrollergebnisse.de/search.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/phys.org/weekly.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'phys.org/weekly.yml' do 4 | it_behaves_like 'config.yml', 'phys.org/weekly.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/rbb24.de/meistgeklickt.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'rbb24.de/meistgeklickt.yml' do 4 | it_behaves_like 'config.yml', 'rbb24.de/meistgeklickt.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/robinwood.de/aktuelles.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'robinwood.de/aktuelles.yml' do 4 | it_behaves_like 'config.yml', 'robinwood.de/aktuelles.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/s3.amazonaws.com/popular_movies.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 's3.amaoznaws.com/popular_movies.yml' do 4 | it_behaves_like 'config.yml', 's3.amazonaws.com/popular_movies.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/sebastianvettel.de/news.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'sebastianvettel.de/news.yml' do 4 | it_behaves_like 'config.yml', 'sebastianvettel.de/news.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/softwareleadweekly.com/issues.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'softwareleadweekly.com/issues.yml' do 4 | it_behaves_like 'config.yml', 'softwareleadweekly.com/issues.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/solarthermalworld.org/news.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'solarthermalworld.org/news.yml' do 4 | it_behaves_like 'config.yml', 'solarthermalworld.org/news.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/spektrum.de/meistgelesen.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'spektrum.de/meistgelesen.yml' do 4 | it_behaves_like 'config.yml', 'spektrum.de/meistgelesen.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/spiegel.de/impressum_autor.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'spiegel.de/impressum_autor.yml' do 4 | it_behaves_like 'config.yml', 'spiegel.de/impressum_autor.yml', id: '975b6ae0-0001-0003-0000-000000018282' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/stackoverflow.com/hot_network_questions.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'stackoverflow.com/hot_network_questions.yml' do 4 | it_behaves_like 'config.yml', 'stackoverflow.com/hot_network_questions.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/steuerzahler.de/news.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'steuerzahler.de/news.yml' do 4 | it_behaves_like 'config.yml', 'steuerzahler.de/news.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/stripes.com/index.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'stripes.com/index.yml' do 4 | it_behaves_like 'config.yml', 'stripes.com/index.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/support.apple.com/en_gb_ht201222.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'support.apple.com/en_gb_ht201222.yml' do 4 | it_behaves_like 'config.yml', 'support.apple.com/en_gb_ht201222.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/support.apple.com/exchange_repair.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'support.apple.com/exchange_repair.yml' do 4 | it_behaves_like 'config.yml', 'support.apple.com/exchange_repair.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/teneriffa-news.com/news.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'teneriffa-news.com/news.yml' do 4 | it_behaves_like 'config.yml', 'teneriffa-news.com/news.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/test.de/archiv.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'test.de/archiv.yml' do 4 | it_behaves_like 'config.yml', 'test.de/archiv.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/theguardian.com/international_mostpopular.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'theguardian.com/international_mostpopular.yml' do 4 | it_behaves_like 'config.yml', 'theguardian.com/international_mostpopular.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/thoughtworks.com/insights.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'thoughtworks.com/insights.yml' do 4 | it_behaves_like 'config.yml', 'thoughtworks.com/insights.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml' do 4 | it_behaves_like 'config.yml', 'tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs/webentwickler-jobs.de/in.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe 'webentwickler-jobs.de/in.yml' do 4 | it_behaves_like 'config.yml', 'webentwickler-jobs.de/in.yml', region: 'berlin' 5 | end 6 | -------------------------------------------------------------------------------- /spec/html2rss/configs_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Html2rss::Configs do 4 | it 'has a version number' do 5 | expect(Html2rss::Configs::VERSION).not_to be_nil 6 | end 7 | 8 | describe '.file_names' do 9 | subject(:file_names) { described_class.file_names } 10 | 11 | specify(:aggregate_failures) do 12 | expect(file_names).to be_an(Array) & be_frozen 13 | end 14 | end 15 | 16 | describe '.find_by_name' do 17 | context 'with valid name' do 18 | subject(:find_by_name) { described_class.find_by_name('adfc.de/pressemitteilungen') } 19 | 20 | specify(:aggregate_failures) do 21 | expect(find_by_name).to be_a(Hash) & be_frozen 22 | end 23 | end 24 | 25 | context 'with name not being a String' do 26 | it 'raises ConfigNotFound error' do 27 | expect { described_class.find_by_name(:foobar) }.to raise_error(RuntimeError) 28 | end 29 | end 30 | 31 | context 'with name not not containing a folder' do 32 | it 'raises ConfigNotFound error' do 33 | expect { described_class.find_by_name('foobar') }.to raise_error(RuntimeError) 34 | end 35 | end 36 | 37 | context 'with inexistent config' do 38 | it 'raises ConfigNotFound error' do 39 | expect { described_class.find_by_name('foobar/baz') }.to raise_error( 40 | Html2rss::Configs::ConfigNotFound 41 | ) 42 | end 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'bundler/setup' 4 | require 'tzinfo' 5 | require 'html2rss' 6 | require 'html2rss/configs' 7 | 8 | Dir['./spec/support/**/*.rb'].each { |f| require f } 9 | 10 | Zeitwerk::Loader.eager_load_all 11 | 12 | RSpec.configure do |config| 13 | # Enable flags like --only-failures and --next-failure 14 | config.example_status_persistence_file_path = '.rspec_status' 15 | 16 | # Disable RSpec exposing methods globally on `Module` and `main` 17 | config.disable_monkey_patching! 18 | config.filter_run_excluding fetch: true 19 | 20 | config.expect_with :rspec do |c| 21 | c.syntax = :expect 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /spec/support/helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'json' 4 | require 'nokogiri' 5 | require 'yaml' 6 | require 'uri' 7 | 8 | ## 9 | # A collection of helper methods. 10 | module Helper 11 | ## 12 | # @param url [String] 13 | # @return [String] 14 | def self.url_to_directory_name(url) 15 | URI(url.split('/')[0..2].join('/')).host.gsub(/^(api|www|webapp)\./, '') 16 | end 17 | 18 | ## 19 | # Determines which selectors are referenced in the template of a Html2rss::PostProcessors::Template. 20 | # 21 | # @param selectors [Hash] the 'selectors hash' 22 | # @return [Array] 23 | def self.referenced_selectors_in_template(selectors) 24 | selectors.each_value.flat_map do |selector_hash| 25 | if selector_hash.is_a?(Hash) 26 | post_processor_hashes(selector_hash['post_process'], 'template').flat_map do |template| 27 | string_formatting_references(template['string']).keys 28 | end 29 | end 30 | end 31 | .compact 32 | end 33 | 34 | ## 35 | # 36 | # @param post_processors [Hash, Array] 37 | # @param keep_name [String] 38 | # @return [Array] containing only the hashes stored under the 'keep_name' 39 | def self.post_processor_hashes(post_processors, keep_name) 40 | [post_processors].flatten.compact.keep_if { |processor| processor['name'] == keep_name } 41 | end 42 | 43 | ## 44 | # Determines the referenced values of "more complex string formatting". 45 | # return [Hash] the keys with their type 46 | def self.string_formatting_references(string) 47 | string.to_s.scan(/%[{<](\w+)[>}](\w)?/).to_h.transform_values do |value| 48 | case value 49 | when 'i', 'd', 'u' 50 | Numeric 51 | else 52 | String 53 | end 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /spec/support/shared_examples/config.yml_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'yaml' 4 | 5 | RSpec.shared_examples 'config.yml' do |file_name, params| 6 | subject(:yaml) { YAML.safe_load_file(file_path) } 7 | 8 | let!(:file_path) do 9 | File.expand_path(File.join(__dir__, '..', '..', '..', 'lib', 'html2rss', 'configs', file_name)) 10 | end 11 | 12 | let(:global_config) do 13 | { 14 | 'headers' => { 15 | 'User-Agent': <<~UA.delete("\n") 16 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) 17 | AppleWebKit/537.36 (KHTML, like Gecko) 18 | Chrome/134.0.0.0 19 | Safari/537.36' 20 | UA 21 | } 22 | } 23 | end 24 | let(:config) do 25 | feed_name = file_path.split(File::Separator)[-2..].join(File::Separator) 26 | config = {}.merge Html2rss::Configs.find_by_name(feed_name) 27 | 28 | config.merge!(global_config.dup) 29 | config[:params] = params if params 30 | config 31 | end 32 | 33 | context 'with the file' do 34 | let(:host_name) { Helper.url_to_directory_name yaml['channel']['url'] } 35 | let(:dirname) { File.dirname(file_path).split(File::Separator).last } 36 | 37 | it 'is parseable' do 38 | expect { yaml }.not_to raise_error 39 | end 40 | 41 | it "resides in a folder named after channel.url's host" do 42 | expect(dirname).to eq(host_name) 43 | end 44 | end 45 | 46 | context 'with file contents' do 47 | it 'has channel and selectors', :aggregate_failures do 48 | expect(yaml).to have_key 'channel' 49 | expect(yaml).to have_key 'selectors' 50 | end 51 | 52 | context 'with channel present' do 53 | it 'has channel required attributes', :aggregate_failures do 54 | %w[url ttl time_zone].each do |required_attribute| 55 | expect(yaml['channel']).to(have_key(required_attribute), required_attribute) 56 | end 57 | end 58 | 59 | it 'has a known time_zone' do 60 | expect { TZInfo::Timezone.get yaml['channel']['time_zone'] }.not_to raise_error 61 | end 62 | end 63 | 64 | context 'with selectors present' do 65 | it 'has required selectors', :aggregate_failures do 66 | %w[items title].each do |required_attribute| 67 | expect(yaml['selectors'][required_attribute]).not_to(be_empty, required_attribute) 68 | end 69 | end 70 | 71 | context 'with template post_processor' do 72 | it 'references available selectors only', :aggregate_failures do 73 | Helper.referenced_selectors_in_template(yaml['selectors']).each do |referenced_selector| 74 | next if referenced_selector == 'self' 75 | 76 | expect(yaml['selectors'][referenced_selector]) 77 | .not_to be_nil, "selector `#{referenced_selector}` referenced, but is missing" 78 | end 79 | end 80 | end 81 | 82 | context 'with categories' do 83 | it 'references available selectors only', :aggregate_failures do 84 | yaml['selectors'].fetch('categories', []).each do |selector_name| 85 | expect(yaml['selectors'][selector_name]) 86 | .not_to be_nil, "categories references `#{selector_name}`, but is missing" 87 | end 88 | end 89 | end 90 | end 91 | end 92 | 93 | context "when fetching #{params}", :fetch do 94 | subject(:feed) { Html2rss.feed(config.dup) } 95 | 96 | it 'has positive amount of items' do 97 | expect(feed.items.count).to be_positive, <<~MSG 98 | No items fetched. 99 | Check the feed URL and selectors in `#{file_name}`. 100 | 101 | # #{file_name} 102 | #{config} 103 | 104 | # resulted in RSS: 105 | #{feed} 106 | MSG 107 | end 108 | end 109 | 110 | context "when fetching #{params} / item", :fetch do 111 | subject(:item) do 112 | items = Html2rss.feed(config.dup).items 113 | 114 | expect(items.count).not_to be_zero, "Zero items fetched for `#{file_name}`" 115 | 116 | items.shift 117 | end 118 | 119 | let(:specified_attributes) { Html2rss::Selectors::ITEM_TAGS & %w[title description author category] } 120 | let(:text_attributes) { specified_attributes & %w[title description author] } 121 | 122 | it 'has no empty text attributes', :aggregate_failures do 123 | text_attributes.each do |attribute_name| 124 | expect(item.public_send(attribute_name).to_s).not_to be_empty, attribute_name.to_s 125 | end 126 | end 127 | 128 | it 'has no empty content attributes', :aggregate_failures do 129 | (specified_attributes - text_attributes).each do |attribute_name| 130 | expect(item.public_send(attribute_name).content).not_to be_empty, attribute_name.to_s 131 | end 132 | end 133 | 134 | it 'has link content beginning with "http" when config has a link selector' do 135 | expect(item&.link&.to_s).to start_with('http') if Html2rss::Selectors::ITEM_TAGS.include?(:url) 136 | end 137 | end 138 | end 139 | --------------------------------------------------------------------------------