├── .github └── workflows │ └── crawl-rss.yml ├── .gitignore ├── Logs ├── 20250527_160454-crawler.txt ├── 20250527_200409-crawler.txt ├── 20250528_001459-crawler.txt ├── 20250528_040605-crawler.txt ├── 20250528_080457-crawler.txt ├── 20250528_120604-crawler.txt ├── 20250528_160421-crawler.txt ├── 20250528_200417-crawler.txt ├── 20250529_001455-crawler.txt ├── 20250529_040636-crawler.txt ├── 20250529_080456-crawler.txt ├── 20250529_120558-crawler.txt ├── 20250529_160421-crawler.txt ├── 20250529_200424-crawler.txt ├── 20250530_001516-crawler.txt ├── 20250530_040617-crawler.txt ├── 20250530_080517-crawler.txt ├── 20250530_120612-crawler.txt ├── 20250530_160429-crawler.txt ├── 20250530_200414-crawler.txt ├── 20250531_001443-crawler.txt ├── 20250531_040515-crawler.txt ├── 20250531_080421-crawler.txt ├── 20250531_120540-crawler.txt ├── 20250531_160353-crawler.txt ├── 20250531_200351-crawler.txt ├── 20250601_001846-crawler.txt ├── 20250601_041539-crawler.txt ├── 20250601_080428-crawler.txt ├── 20250601_120520-crawler.txt ├── 20250601_160402-crawler.txt ├── 20250601_200342-crawler.txt ├── 20250602_001628-crawler.txt ├── 20250602_041025-crawler.txt ├── 20250602_080539-crawler.txt ├── 20250602_120604-crawler.txt ├── 20250602_160433-crawler.txt ├── 20250602_200435-crawler.txt ├── 20250603_001503-crawler.txt ├── 20250603_040811-crawler.txt ├── 20250603_080534-crawler.txt ├── 20250603_120614-crawler.txt ├── 20250604_001526-crawler.txt ├── 20250604_040803-crawler.txt ├── 20250604_080521-crawler.txt ├── 20250604_120616-crawler.txt ├── 20250604_160457-crawler.txt ├── 20250604_200410-crawler.txt ├── 20250605_001505-crawler.txt ├── 20250605_040840-crawler.txt ├── 20250605_080548-crawler.txt ├── 20250605_120642-crawler.txt ├── 20250605_160429-crawler.txt ├── 20250605_200342-crawler.txt ├── 20250606_001531-crawler.txt ├── 20250606_040812-crawler.txt ├── 20250606_080522-crawler.txt └── 20250606_120613-crawler.txt ├── README.md ├── RssCrawler.db ├── RssCrawler ├── Models │ ├── BlacklistRow.cs │ ├── RssChannelRow.cs │ └── RssFeedItemRow.cs ├── MyLogger.cs ├── Program.cs ├── RssCrawler.cs ├── RssCrawler.csproj ├── RssCrawler.sln ├── RssCrawlerEngine.cs ├── SimpleFeedlyDatabaseAccess.cs └── Utils │ ├── EnvironmentHelper.cs │ ├── FileUtils.cs │ └── StringUtils.cs ├── images └── demo.png ├── index.html └── site.js /.github/workflows/crawl-rss.yml: -------------------------------------------------------------------------------- 1 | on: 2 | schedule: 3 | # Runs every 4h 4 | - cron: '0 */4 * * *' 5 | workflow_dispatch: 6 | 7 | jobs: 8 | update-readme-with-blog: 9 | name: Crawl rss and generate static page 10 | runs-on: windows-2019 11 | steps: 12 | - uses: actions/checkout@main 13 | with: 14 | repository: minhhungit/github-action-rss-crawler 15 | token: ${{ secrets.GITHUB_TOKEN }} 16 | - uses: actions/setup-dotnet@v1 17 | with: 18 | dotnet-version: 3.1.x 19 | #- run: dotnet build DemoApp\DemoApp.sln 20 | - run: dotnet run --project RssCrawler\RssCrawler.csproj 21 | - run: git config --local user.email "it.minhhung@gmail.com" 22 | - run: git config --local user.name "Jin" 23 | - run: git add . 24 | - run: git commit -m "Add changes" 25 | - run: git push 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # This .gitignore file was automatically created by Microsoft(R) Visual Studio. 3 | ################################################################################ 4 | 5 | /RssCrawler/.vs/RssCrawler/v16 6 | /RssCrawler/bin/Debug/netcoreapp3.1 7 | /RssCrawler/obj 8 | /RssCrawler/.vs/RssCrawler/DesignTimeBuild/.dtbcache.v2 9 | -------------------------------------------------------------------------------- /Logs/20250527_160454-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250527_200409-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250528_001459-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250528_040605-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250528_080457-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 10 14 | INFO - Deleted old items 15 | INFO - Inserted 5 items 16 | INFO - Updated status 17 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 18 | INFO - Nbr of feed items 144 19 | INFO - Deleted old items 20 | INFO - Inserted 5 items 21 | INFO - Updated status 22 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 23 | INFO - Nbr of feed items 25 24 | INFO - Deleted old items 25 | INFO - Inserted 5 items 26 | INFO - Updated status 27 | INFO Done! 28 | -------------------------------------------------------------------------------- /Logs/20250528_120604-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250528_160421-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250528_200417-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250529_001455-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 10 14 | INFO - Deleted old items 15 | INFO - Inserted 5 items 16 | INFO - Updated status 17 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 18 | INFO - Nbr of feed items 144 19 | INFO - Deleted old items 20 | INFO - Inserted 5 items 21 | INFO - Updated status 22 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 23 | INFO - Nbr of feed items 25 24 | INFO - Deleted old items 25 | INFO - Inserted 5 items 26 | INFO - Updated status 27 | INFO Done! 28 | -------------------------------------------------------------------------------- /Logs/20250529_040636-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250529_080456-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250529_120558-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250529_160421-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250529_200424-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250530_001516-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 10 14 | INFO - Deleted old items 15 | INFO - Inserted 5 items 16 | INFO - Updated status 17 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 18 | INFO - Nbr of feed items 144 19 | INFO - Deleted old items 20 | INFO - Inserted 5 items 21 | INFO - Updated status 22 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 23 | INFO - Nbr of feed items 25 24 | INFO - Deleted old items 25 | INFO - Inserted 5 items 26 | INFO - Updated status 27 | INFO Done! 28 | -------------------------------------------------------------------------------- /Logs/20250530_040617-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250530_080517-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250530_120612-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250530_160429-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250530_200414-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250531_001443-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250531_040515-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250531_080421-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250531_120540-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250531_160353-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250531_200351-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250601_001846-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250601_041539-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250601_080428-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250601_120520-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250601_160402-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250601_200342-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250602_001628-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250602_041025-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250602_080539-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250602_120604-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 10 14 | INFO - Deleted old items 15 | INFO - Inserted 5 items 16 | INFO - Updated status 17 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 18 | INFO - Nbr of feed items 144 19 | INFO - Deleted old items 20 | INFO - Inserted 5 items 21 | INFO - Updated status 22 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 23 | INFO - Nbr of feed items 25 24 | INFO - Deleted old items 25 | INFO - Inserted 5 items 26 | INFO - Updated status 27 | INFO Done! 28 | -------------------------------------------------------------------------------- /Logs/20250602_160433-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250602_200435-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250603_001503-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250603_040811-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250603_080534-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250603_120614-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250604_001526-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250604_040803-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250604_080521-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250604_120616-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250604_160457-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250604_200410-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250605_001505-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250605_040840-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250605_080548-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250605_120642-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250605_160429-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250605_200342-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250606_001531-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250606_040812-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 10 14 | INFO - Deleted old items 15 | INFO - Inserted 5 items 16 | INFO - Updated status 17 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 18 | INFO - Nbr of feed items 144 19 | INFO - Deleted old items 20 | INFO - Inserted 5 items 21 | INFO - Updated status 22 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 23 | INFO - Nbr of feed items 25 24 | INFO - Deleted old items 25 | INFO - Inserted 5 items 26 | INFO - Updated status 27 | INFO Done! 28 | -------------------------------------------------------------------------------- /Logs/20250606_080522-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /Logs/20250606_120613-crawler.txt: -------------------------------------------------------------------------------- 1 | INFO There are 309 active channels 2 | INFO - [1/5] Fetching url https://damienbod.com/feed 3 | INFO - Nbr of feed items 10 4 | INFO - Deleted old items 5 | INFO - Inserted 5 items 6 | INFO - Updated status 7 | INFO - [2/5] Fetching url https://vuejsfeed.com/feed 8 | INFO - Nbr of feed items 752 9 | INFO - Deleted old items 10 | INFO - Inserted 5 items 11 | INFO - Updated status 12 | INFO - [3/5] Fetching url https://www.stevejgordon.co.uk/feed 13 | INFO - Nbr of feed items 0 14 | INFO - [4/5] Fetching url http://blog.jonathanoliver.com/index.xml 15 | INFO - Nbr of feed items 144 16 | INFO - Deleted old items 17 | INFO - Inserted 5 items 18 | INFO - Updated status 19 | INFO - [5/5] Fetching url http://feeds.feedburner.com/codeclimber 20 | INFO - Nbr of feed items 25 21 | INFO - Deleted old items 22 | INFO - Inserted 5 items 23 | INFO - Updated status 24 | INFO Done! 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Rss auto crawling using Github Action 2 | 3 | **Github Action does all these steps automatically, it run rss crawler every 4 hours** 4 | 5 | Steps: 6 | - Github will pull repository, build and run crawler code (crawling code is C# (.net core), github will run it directly) 7 | - Read channel urls from LiteDB 8 | - Fetch rss feed items 9 | - Insert feed items into LiteDB after checking blacklist and existing 10 | - Generate all rss items to static page (index.html - https://minhhungit.github.io/github-action-rss-crawler/ ) 11 | - Commit change (litedb database & index.html page) and push to this repo 12 | 13 | 14 | ### Workflow 15 | ```yml 16 | on: 17 | schedule: 18 | # Runs every 4h 19 | - cron: '0 */4 * * *' 20 | workflow_dispatch: 21 | 22 | jobs: 23 | update-readme-with-blog: 24 | name: Crawl rss and generate static page 25 | runs-on: windows-2019 26 | steps: 27 | - uses: actions/checkout@main 28 | with: 29 | repository: minhhungit/github-action-rss-crawler 30 | token: ${{ secrets.GITHUB_TOKEN }} 31 | - uses: actions/setup-dotnet@v1 32 | with: 33 | dotnet-version: 3.1.x 34 | #- run: dotnet build DemoApp\DemoApp.sln 35 | - run: dotnet run --project RssCrawler\RssCrawler.csproj 36 | - run: git config --local user.email "it.minhhung@gmail.com" 37 | - run: git config --local user.name "Jin" 38 | - run: git add . 39 | - run: git commit -m "Add changes" 40 | - run: git push 41 | ``` 42 | 43 | --- 44 | 45 | ### Demo 46 | 47 | > https://minhhungit.github.io/github-action-rss-crawler/ 48 | 49 | 50 | 51 | 52 | ### Donate ^^ 53 | **If you like my works and would like to support then you can buy me a coffee ☕️ anytime** 54 | 55 | Buy Me a Coffee at ko-fi.com 56 | 57 | **I would appreciate it!!!** 58 | -------------------------------------------------------------------------------- /RssCrawler.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/minhhungit/github-action-rss-crawler/2f08d4a76dd50793b16a8b038c66058ef6385d1a/RssCrawler.db -------------------------------------------------------------------------------- /RssCrawler/Models/BlacklistRow.cs: -------------------------------------------------------------------------------- 1 | using LiteDB; 2 | 3 | namespace RssCrawler.Models 4 | { 5 | public class BlacklistRow 6 | { 7 | public ObjectId Id { get; set; } 8 | public string ShrinkedTitle { get; set; } 9 | public string ShrinkedTitleHash { get; set; } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /RssCrawler/Models/RssChannelRow.cs: -------------------------------------------------------------------------------- 1 | using LiteDB; 2 | using System; 3 | 4 | namespace RssCrawler.Models 5 | { 6 | public class RssChannelRow 7 | { 8 | public ObjectId Id { get; set; } 9 | public int Type { get; set; } 10 | public string DomainGroup { get; set; } 11 | public string Title { get; set; } 12 | public string Link { get; set; } 13 | public string Description { get; set; } 14 | public string Language { get; set; } 15 | public string Copyright { get; set; } 16 | public DateTime LastUpdatedDate { get; set; } 17 | public string ImageUrl { get; set; } 18 | public string OriginalDocument { get; set; } 19 | public bool IsError { get; set; } 20 | public string ErrorMessage { get; set; } 21 | public int IsActive { get; set; } 22 | public RssCrawlerEngine RssCrawlerEngine { get; set; } 23 | public int? RefreshTimeMinutes { get; set; } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /RssCrawler/Models/RssFeedItemRow.cs: -------------------------------------------------------------------------------- 1 | using LiteDB; 2 | using System; 3 | 4 | namespace RssCrawler.Models 5 | { 6 | public class RssFeedItemRow 7 | { 8 | public ObjectId Id { get; set; } 9 | 10 | [BsonRef("channels")] 11 | public RssChannelRow Channel { get; set; } 12 | public string FeedItemKey { get; set; } 13 | public string Title { get; set; } 14 | public string Link { get; set; } 15 | public string Description { get; set; } 16 | public DateTime PublishingDate { get; set; } 17 | public string Author { get; set; } 18 | public string Content { get; set; } 19 | public bool IsChecked { get; set; } 20 | 21 | public string CoverImageUrl { get; set; } 22 | public string XmlData { get; set; } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /RssCrawler/MyLogger.cs: -------------------------------------------------------------------------------- 1 | using RssCrawler.Utils; 2 | using System.IO; 3 | 4 | namespace RssCrawler 5 | { 6 | public class MyLogger 7 | { 8 | private string FilePath { get; set; } 9 | 10 | public MyLogger(string path) 11 | { 12 | this.FilePath = path; 13 | } 14 | 15 | public void Info(string text) 16 | { 17 | FileUtils.WriteText(this.FilePath, $"INFO\t{text}"); 18 | } 19 | 20 | public void Error(string text) 21 | { 22 | FileUtils.WriteText(this.FilePath, $"ERROR\t{text}"); 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /RssCrawler/Program.cs: -------------------------------------------------------------------------------- 1 | using LiteDB; 2 | using RssCrawler.Utils; 3 | using System.Collections.Specialized; 4 | using System.IO; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Text.RegularExpressions; 8 | 9 | namespace RssCrawler 10 | { 11 | class Program 12 | { 13 | static void Main(string[] args) 14 | { 15 | var appRootPath = EnvironmentHelper.GetApplicationRoot(); 16 | //WriteAllText(Path.Combine(appRootPath, "../", "README.md"), "Hello From Jin -- this file is auto committed " + DateTime.Now); 17 | 18 | SimpleFeedlyDatabaseAccess.Shrink(); 19 | RssCrawler.CrawlRss(); 20 | 21 | string indexFilePath = Path.Combine(appRootPath, "../", "index.html"); 22 | string indexContent = File.ReadAllText(indexFilePath, Encoding.UTF8); 23 | 24 | //var regex = new Regex(@"[\n\r]+(.*?)[\n\r]+"); 25 | //var match = regex.Match(indexContent); 26 | //var result = match.Groups[1].Value; 27 | 28 | var change = string.Empty; //$"Hello, this text is auto generated {DateTime.Now:yyy/MM/dd HH:mm:ss}"; 29 | 30 | var feedItems = SimpleFeedlyDatabaseAccess.GetAllFeedItems(); 31 | feedItems = feedItems.Where(x => !x.Channel.Title.Contains("medium", System.StringComparison.CurrentCultureIgnoreCase)).ToList(); // I hate medium rss 32 | 33 | ObjectId currentChannelId = null; 34 | 35 | var sb = new StringBuilder(); 36 | var sbChannel = new StringBuilder(); 37 | 38 | var counter = 1; 39 | foreach (var item in feedItems) 40 | { 41 | var isNewChannel = item.Channel.Id != currentChannelId; 42 | currentChannelId = item.Channel.Id; 43 | 44 | // just get top 20 feed items 45 | if (counter > 20 && !isNewChannel) 46 | { 47 | counter++; 48 | continue; 49 | } 50 | 51 | if (isNewChannel) 52 | { 53 | counter = 1; 54 | if (sbChannel.Length > 0) // has previous item 55 | { 56 | sbChannel.AppendLine(""); // div.row 57 | sb.Append(sbChannel); 58 | sbChannel = new StringBuilder(); 59 | } 60 | 61 | sbChannel.AppendLine($"

# {item.Channel.Title}

"); 62 | sbChannel.AppendLine(""); // div.row 78 | 79 | change = sb.ToString(); 80 | 81 | //var newContent = Regex.Replace(indexContent, $"[\n\r]+(.*?)[\n\r]+", string.Format("\n{0}\n", change)); 82 | var newContent = Regex.Replace(indexContent, $"(?:[^\n]*(\n+))+", string.Format("\n{0}\n", change)); 83 | WriteAllText(indexFilePath, newContent); 84 | 85 | //string assemblyFolder = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); 86 | //string crawlLogFolderPath= Path.Combine(assemblyFolder, "Logs"); 87 | 88 | //if (!Directory.Exists(crawlLogFolderPath)) 89 | //{ 90 | // Directory.CreateDirectory(crawlLogFolderPath); 91 | //} 92 | //string[] filePaths = Directory.GetFiles(crawlLogFolderPath); 93 | //foreach (var filename in filePaths) 94 | //{ 95 | // string targetFolderPath = Path.Combine(appRootPath, "..\\Logs\\"); 96 | // if (!Directory.Exists(targetFolderPath)) 97 | // { 98 | // Directory.CreateDirectory(targetFolderPath); 99 | // } 100 | 101 | // var targetFilePath = Path.Combine(targetFolderPath, Path.GetFileName(filename)); 102 | // File.Copy(filename, targetFilePath, true); 103 | //} 104 | } 105 | 106 | static void WriteAllText(string path, string txt) 107 | { 108 | var bytes = Encoding.UTF8.GetBytes(txt); 109 | using (var f = File.Open(path, FileMode.Create)) 110 | { 111 | f.Write(bytes, 0, bytes.Length); 112 | } 113 | } 114 | } 115 | } -------------------------------------------------------------------------------- /RssCrawler/RssCrawler.cs: -------------------------------------------------------------------------------- 1 | using HtmlAgilityPack; 2 | using LiteDB; 3 | using Newtonsoft.Json; 4 | using RssCrawler.Models; 5 | using RssCrawler.Utils; 6 | using System; 7 | using System.Collections.Generic; 8 | using System.Globalization; 9 | using System.IO; 10 | using System.Linq; 11 | using System.Net; 12 | using System.Text; 13 | using System.Text.RegularExpressions; 14 | using System.Xml; 15 | 16 | namespace RssCrawler 17 | { 18 | public class RssCrawler 19 | { 20 | const string LogDateTimeFormat = "yyyyMMdd_HHmmss"; 21 | 22 | public static void CrawlRss() 23 | { 24 | var logFolder = $"{Path.Combine(EnvironmentHelper.GetApplicationRoot(), "../Logs/")}"; 25 | if (!Directory.Exists(logFolder)) 26 | { 27 | Directory.CreateDirectory(logFolder); 28 | } 29 | 30 | var existedLogFiles = Directory.GetFiles(logFolder); 31 | 32 | var logFileBank = new Dictionary(); 33 | foreach (var file in existedLogFiles) 34 | { 35 | var dtFormatArr = Path.GetFileNameWithoutExtension(file).Split('-'); 36 | if (dtFormatArr.Length > 0) 37 | { 38 | var dtText = dtFormatArr[0]; 39 | if (DateTime.TryParseExact(dtText, LogDateTimeFormat, CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime dt)) 40 | { 41 | logFileBank.Add(dt, file); 42 | } 43 | } 44 | } 45 | 46 | foreach (var kv in logFileBank) 47 | { 48 | if (kv.Key < DateTime.Now.AddDays(-10)) // just keep 10 days nearest 49 | { 50 | File.Delete(kv.Value); 51 | } 52 | } 53 | 54 | var logPath = $"{Path.Combine(logFolder, $"{DateTime.Now.ToString(LogDateTimeFormat)}-crawler.txt")}"; 55 | 56 | var _logger = new MyLogger(logPath); 57 | 58 | var feedUrl = string.Empty; 59 | try 60 | { 61 | List channels = channels = SimpleFeedlyDatabaseAccess.GetActiveChannels().OrderBy(x => x.Id).ToList(); 62 | 63 | _logger.Info($"There are {channels.Count} active channels"); 64 | 65 | // just take top 5 channel 66 | // purpose of this repository is demo how to run dotnet core app on github action, not rss crawl 67 | channels = channels.Take(5).ToList(); 68 | 69 | var progressCounter = 0; 70 | foreach (var channel in channels) 71 | { 72 | progressCounter++; 73 | 74 | feedUrl = channel.Link; 75 | 76 | if (string.IsNullOrWhiteSpace(feedUrl)) 77 | { 78 | continue; 79 | } 80 | 81 | try 82 | { 83 | _logger.Info($"- [{progressCounter}/{channels.Count}] Fetching url {feedUrl}"); 84 | var feed = GetFeedsFromChannel(feedUrl, channel.RssCrawlerEngine, out RssCrawlerEngine usedEngine, out Exception fetchFeedError); 85 | 86 | _logger.Info($" - Nbr of feed items {feed?.Items?.Count ?? 0}"); 87 | 88 | // update default engine for channel 89 | SimpleFeedlyDatabaseAccess.UpdateChannelDefaultEngine(channel.Id, fetchFeedError != null ? RssCrawlerEngine.CodeHollowFeedReader : usedEngine); 90 | 91 | if (feed != null && feed?.Items != null) 92 | { 93 | var top5LatestItems = feed.Items 94 | .OrderByDescending(x => x.PublishingDate) 95 | .Take(5) 96 | .ToList(); 97 | 98 | if (top5LatestItems.Count == 0) 99 | { 100 | continue; 101 | } 102 | else 103 | { 104 | SimpleFeedlyDatabaseAccess.DeleteAllFeedItemByChannelId(channel.Id); 105 | _logger.Info($" - Deleted old items"); 106 | 107 | var insertItems = new List(); 108 | 109 | foreach (var fItem in top5LatestItems) 110 | { 111 | if (!StringUtils.IsUrl(fItem.Link)) 112 | { 113 | continue; 114 | } 115 | 116 | var feedItemKey = GenerateFeedItemKey(fItem); 117 | 118 | if (string.IsNullOrWhiteSpace(feedItemKey) || string.IsNullOrWhiteSpace(fItem.Link)) 119 | { 120 | continue; 121 | } 122 | 123 | var feedItem = new RssFeedItemRow 124 | { 125 | Channel = channel, 126 | FeedItemKey = feedItemKey, 127 | Title = string.IsNullOrWhiteSpace(fItem.Title) ? fItem.Link : fItem.Title, 128 | Link = fItem.Link, 129 | Description = fItem.Description, 130 | PublishingDate = fItem.PublishingDate, 131 | Author = fItem.Author 132 | }; 133 | 134 | var shrinkedTitle = StringUtils.UnsignString(StringUtils.RemoveNonAlphaCharactersAndDigit(feedItem.Title)).ToLower(); 135 | var shrinkedTitleHash = StringUtils.MD5Hash(shrinkedTitle); 136 | 137 | if (!SimpleFeedlyDatabaseAccess.IsBlackListWord(shrinkedTitleHash)) 138 | { 139 | var channelDomainGroup = string.IsNullOrEmpty(channel.DomainGroup) ? channel.Link : channel.DomainGroup; 140 | 141 | if (!SimpleFeedlyDatabaseAccess.IsExistedFeedItem(channel.Id, channelDomainGroup, feedItem.FeedItemKey)) 142 | { 143 | //var coverImageUrl = fItem.GetFeedCoverImage(); 144 | //if (!string.IsNullOrWhiteSpace(coverImageUrl)) 145 | //{ 146 | // feedItem.CoverImageUrl = coverImageUrl; 147 | //} 148 | 149 | insertItems.Add(feedItem); 150 | } 151 | } 152 | } 153 | 154 | SimpleFeedlyDatabaseAccess.InsertFeedItems(insertItems); 155 | 156 | _logger.Info($" - Inserted {insertItems.Count()} items"); 157 | } 158 | 159 | SimpleFeedlyDatabaseAccess.UpdateChannelErrorStatus(channel.Id, false, null); 160 | _logger.Info($" - Updated status"); 161 | } 162 | else 163 | { 164 | _logger.Info($" - [NO ITEMS]"); 165 | SimpleFeedlyDatabaseAccess.UpdateChannelErrorStatus(channel.Id, true, fetchFeedError == null ? null : JsonConvert.SerializeObject(fetchFeedError)); 166 | 167 | if (fetchFeedError != null) 168 | { 169 | ErrorHandle(fetchFeedError, feedUrl); 170 | } 171 | } 172 | } 173 | catch (Exception err) 174 | { 175 | _logger.Error($" - Got Error: {JsonConvert.SerializeObject(err, new JsonSerializerSettings { ReferenceLoopHandling = ReferenceLoopHandling.Ignore })}"); 176 | 177 | SimpleFeedlyDatabaseAccess.UpdateChannelErrorStatus(channel.Id, true, JsonConvert.SerializeObject(err)); 178 | ErrorHandle(err, feedUrl); 179 | } 180 | } 181 | } 182 | catch (Exception ex) 183 | { 184 | _logger.Error($" - [ERROR]: {JsonConvert.SerializeObject(ex, new JsonSerializerSettings { ReferenceLoopHandling = ReferenceLoopHandling.Ignore })}"); 185 | 186 | ErrorHandle(ex, feedUrl); 187 | } 188 | 189 | _logger.Info($"Done!"); 190 | } 191 | 192 | private static string GenerateFeedItemKey(SimpleFeedlyFeedItem item) 193 | { 194 | if (string.IsNullOrWhiteSpace(item.Id)) 195 | { 196 | if (string.IsNullOrWhiteSpace(item.Link)) 197 | { 198 | return null; 199 | } 200 | else 201 | { 202 | return item.Link; 203 | } 204 | } 205 | else 206 | { 207 | return item.Id; 208 | } 209 | } 210 | 211 | private static void ErrorHandle(Exception ex, string feedUrl) 212 | { 213 | // we can send an email for warning right here if needed 214 | 215 | // or just log error into some error stores, it's up to you 216 | //ErrorStore.LogExceptionWithoutContext(ex, false, false, 217 | // new Dictionary 218 | // { 219 | // {"feedUrl", feedUrl } 220 | // }); 221 | } 222 | 223 | /// 224 | /// GetFeedsFromChannel 225 | /// 226 | /// feed Url 227 | /// crawler engine 228 | /// 229 | /// Normally we call this method two times, first times with 'default' channel's crawler engine, and the last times for the rest crawler engine 230 | /// isRest = false: FIRST TIMES 231 | /// isRest = true: LAST TIMES 232 | /// 233 | /// 234 | /// 235 | /// 236 | public static SimpleFeedlyFeed GetFeedsFromChannel(string feedUrl, RssCrawlerEngine defaultEngineType, out RssCrawlerEngine engineTypeResult, out Exception error) 237 | { 238 | IRssEngine getEngine(RssCrawlerEngine type) 239 | { 240 | IRssEngine tmpEngine = null; 241 | switch (type) 242 | { 243 | case RssCrawlerEngine.SyndicationFeed: 244 | tmpEngine = new SyndicationFeedEngine(); 245 | break; 246 | case RssCrawlerEngine.CodeHollowFeedReader: 247 | tmpEngine = new CodeHollowFeedReaderEngine(); 248 | break; 249 | case RssCrawlerEngine.ParseRssByXml: 250 | tmpEngine = new ParseRssByXmlEngine(); 251 | break; 252 | default: 253 | 254 | break; 255 | } 256 | 257 | if (tmpEngine == null) 258 | { 259 | throw new Exception($"Can not find crawler engine for type <{type}>"); 260 | } 261 | 262 | return tmpEngine; 263 | } 264 | 265 | RssCrawlerEngine currentEngineType = RssCrawlerEngine.CodeHollowFeedReader; 266 | var items = new List(); 267 | 268 | try 269 | { 270 | // check default engine first 271 | IRssEngine rssEngine = getEngine(defaultEngineType); 272 | var feedItems = rssEngine.GetItems(feedUrl, out error); 273 | 274 | if (error == null && feedItems.Count > 0) // no error 275 | { 276 | currentEngineType = defaultEngineType; 277 | items = feedItems ?? new List(); 278 | } 279 | else 280 | { 281 | // check the rest engines 282 | error = null; 283 | 284 | foreach (RssCrawlerEngine engineLoop in (RssCrawlerEngine[])Enum.GetValues(typeof(RssCrawlerEngine))) 285 | { 286 | if (engineLoop == defaultEngineType) 287 | { 288 | continue; 289 | } 290 | 291 | currentEngineType = engineLoop; 292 | 293 | rssEngine = getEngine(engineLoop); 294 | feedItems = rssEngine.GetItems(feedUrl, out error); 295 | 296 | items.AddRange(feedItems ?? new List()); 297 | 298 | if (error == null && feedItems.Count > 0) // no error 299 | { 300 | items = feedItems ?? new List(); 301 | break; 302 | } 303 | } 304 | } 305 | } 306 | catch (Exception ex) 307 | { 308 | error = ex; 309 | } 310 | 311 | engineTypeResult = currentEngineType; 312 | return new SimpleFeedlyFeed { Items = items ?? new List() }; 313 | } 314 | } 315 | 316 | 317 | public class SimpleFeedlyFeed 318 | { 319 | public SimpleFeedlyFeed() 320 | { 321 | Items = new List(); 322 | } 323 | 324 | public List Items { get; set; } 325 | } 326 | 327 | public class SimpleFeedlyFeedItem 328 | { 329 | public string Id { get; set; } 330 | public string Title { get; set; } 331 | public string Link { get; set; } 332 | public string Description { get; set; } 333 | public DateTime PublishingDate { get; set; } 334 | public string Author { get; set; } 335 | public string Content { get; set; } 336 | 337 | //public string XmlData { get; set; } 338 | 339 | public string GetFeedCoverImage() 340 | { 341 | string imageUrl = string.Empty; 342 | HtmlDocument doc = new HtmlDocument(); 343 | 344 | try 345 | { 346 | HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Link ?? string.Empty); 347 | HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 348 | string pageSource = string.Empty; 349 | 350 | if (response.StatusCode == HttpStatusCode.OK) 351 | { 352 | Stream receiveStream = response.GetResponseStream(); 353 | StreamReader readStream; 354 | if (string.IsNullOrWhiteSpace(response.CharacterSet)) 355 | readStream = new StreamReader(receiveStream); 356 | else 357 | readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet)); 358 | 359 | pageSource = readStream.ReadToEnd(); 360 | 361 | response.Close(); 362 | readStream.Close(); 363 | } 364 | 365 | string metaImageRegexPattern = @"]*?property[\s]?=[\s""']+og:image[\s""']+content[\s]?=[\s""']+(.*?)[""']+.*?>"; 366 | var metaRegex = new Regex(metaImageRegexPattern, RegexOptions.IgnoreCase); 367 | var mDesc = metaRegex.Match(pageSource ?? string.Empty); 368 | if (mDesc.Success && mDesc.Groups.Count >= 2) 369 | { 370 | imageUrl = mDesc.Groups[1]?.Value ?? string.Empty; 371 | } 372 | } 373 | catch { } 374 | 375 | try 376 | { 377 | if (string.IsNullOrWhiteSpace(imageUrl)) 378 | { 379 | string xpath = "//meta[@property='og:image']"; 380 | doc = new HtmlWeb().Load(Link ?? string.Empty); 381 | var ogImage = doc.DocumentNode.SelectSingleNode(xpath); 382 | 383 | imageUrl = ogImage?.Attributes["content"]?.Value?.ToString() ?? string.Empty; 384 | } 385 | } 386 | catch 387 | { 388 | 389 | } 390 | 391 | try 392 | { 393 | if (string.IsNullOrWhiteSpace(imageUrl)) 394 | { 395 | //string pattern = @"]*?src=('|')([^'']+)\1"; 396 | string pattern = ""; 397 | Regex myRegex = new Regex(pattern, RegexOptions.IgnoreCase); 398 | 399 | // use Desc 400 | if (!string.IsNullOrWhiteSpace(Description)) 401 | { 402 | try 403 | { 404 | var mDesc = myRegex.Match(Description); 405 | 406 | if (mDesc.Success && mDesc.Groups.Count >= 2) 407 | { 408 | imageUrl = mDesc.Groups[1]?.Value ?? string.Empty; 409 | } 410 | } 411 | catch { } 412 | } 413 | else 414 | { 415 | // use Content 416 | if (!string.IsNullOrWhiteSpace(Content)) 417 | { 418 | try 419 | { 420 | Match mContent = myRegex.Match(Content); 421 | 422 | if (mContent.Success && mContent.Groups.Count >= 2) 423 | { 424 | imageUrl = mContent.Groups[1]?.Value ?? string.Empty; 425 | } 426 | } 427 | catch { } 428 | } 429 | else 430 | { 431 | // last chance: full html 432 | if (doc != null && !string.IsNullOrWhiteSpace(doc?.Text)) 433 | { 434 | Match m = myRegex.Match(doc.Text); 435 | 436 | if (m.Success && m.Groups.Count >= 2) 437 | { 438 | imageUrl = m.Groups[1]?.Value ?? string.Empty; 439 | } 440 | } 441 | } 442 | } 443 | } 444 | } 445 | catch 446 | { 447 | 448 | } 449 | 450 | //if (!string.IsNullOrWhiteSpace(XmlData)) 451 | //{ 452 | // try 453 | // { 454 | // Match m = myRegex.Match(XmlData); 455 | 456 | // if (m.Success && m.Groups.Count >= 2) 457 | // { 458 | // var tmp = m.Groups[1]?.Value ?? string.Empty; 459 | // if (!string.IsNullOrWhiteSpace(tmp)) 460 | // { 461 | // return tmp; 462 | // } 463 | // } 464 | // } 465 | // catch { } 466 | //} 467 | 468 | if (!string.IsNullOrWhiteSpace(imageUrl)) 469 | { 470 | if (!imageUrl.StartsWith("http")) 471 | { 472 | Uri pageUri = new Uri(Link); 473 | return $"{pageUri.Scheme + Uri.SchemeDelimiter + pageUri.Host + ":" + pageUri.Port}/{imageUrl.TrimStart('/')}"; 474 | } 475 | else 476 | { 477 | return imageUrl; 478 | } 479 | } 480 | 481 | return string.Empty; 482 | } 483 | } 484 | 485 | public interface IRssEngine 486 | { 487 | List GetItems(string feedUrl, out Exception error); 488 | } 489 | 490 | internal class CodeHollowFeedReaderEngine : IRssEngine 491 | { 492 | public List GetItems(string feedUrl, out Exception error) 493 | { 494 | Exception currentEx = null; 495 | List items = new List(); 496 | 497 | try 498 | { 499 | var feed = CodeHollow.FeedReader.FeedReader.ReadAsync(feedUrl).GetAwaiter().GetResult(); 500 | 501 | foreach (var item in feed.Items) 502 | { 503 | var feedItem = new SimpleFeedlyFeedItem 504 | { 505 | Id = item.Id, 506 | Title = string.IsNullOrWhiteSpace(item.Title) ? item.Link : item.Title, 507 | Link = item.Link, 508 | Description = item.Description, 509 | PublishingDate = item.PublishingDate ?? DateTime.Now, 510 | Author = item.Author, 511 | Content = item.Content 512 | }; 513 | 514 | items.Add(feedItem); 515 | } 516 | } 517 | catch (Exception ex) 518 | { 519 | currentEx = ex; 520 | } 521 | 522 | error = currentEx; 523 | return items; 524 | } 525 | } 526 | 527 | internal class SyndicationFeedEngine : IRssEngine 528 | { 529 | public List GetItems(string feedUrl, out Exception error) 530 | { 531 | Exception currentEx = null; 532 | List items = new List(); 533 | XmlReaderSettings settings = new XmlReaderSettings(); 534 | settings.DtdProcessing = DtdProcessing.Parse; 535 | 536 | try 537 | { 538 | using (var reader = XmlReader.Create(feedUrl, settings)) 539 | { 540 | var feed = System.ServiceModel.Syndication.SyndicationFeed.Load(reader); 541 | reader.Close(); 542 | 543 | foreach (System.ServiceModel.Syndication.SyndicationItem item in feed.Items) 544 | { 545 | var feedItem = new SimpleFeedlyFeedItem(); 546 | 547 | var link = item.Links.FirstOrDefault()?.Uri.ToString(); 548 | link = string.IsNullOrWhiteSpace(link) ? item.Id : link; 549 | 550 | feedItem.Id = item.Id; 551 | feedItem.Title = string.IsNullOrWhiteSpace(item.Title?.Text) ? link : item.Title.Text; 552 | feedItem.Link = link; 553 | feedItem.Description = item.Summary?.Text; 554 | feedItem.PublishingDate = item.PublishDate.UtcDateTime; 555 | feedItem.Author = item.Authors.FirstOrDefault()?.Name ?? string.Empty; 556 | feedItem.Content = item.Content?.ToString(); 557 | 558 | //feedItem.XmlData = item.GetRss20Formatter().ToString(); 559 | 560 | items.Add(feedItem); 561 | } 562 | } 563 | } 564 | catch (Exception ex) 565 | { 566 | currentEx = ex; 567 | } 568 | 569 | error = currentEx; 570 | return items; 571 | } 572 | } 573 | 574 | internal class ParseRssByXmlEngine : IRssEngine 575 | { 576 | public List GetItems(string feedUrl, out Exception error) 577 | { 578 | Exception currentEx = null; 579 | List items = new List(); 580 | 581 | try 582 | { 583 | var xmlString = string.Empty; 584 | using (WebClient client = new WebClient()) 585 | { 586 | var htmlData = client.DownloadData(feedUrl); 587 | xmlString = System.Text.Encoding.UTF8.GetString(htmlData); 588 | 589 | // ReplaceHexadecimalSymbols 590 | string r = "[\x00-\x08\x0B\x0C\x0E-\x1F\x26]"; 591 | xmlString = Regex.Replace(xmlString, r, "", RegexOptions.Compiled); 592 | } 593 | 594 | XmlDocument rssXmlDoc = new XmlDocument(); 595 | rssXmlDoc.LoadXml(xmlString); 596 | 597 | // Parse the Items in the RSS file 598 | XmlNodeList rssNodes = rssXmlDoc.SelectNodes("rss/channel/item"); 599 | 600 | var namespaceManager = new XmlNamespaceManager(rssXmlDoc.NameTable); 601 | var contentNamespace = rssXmlDoc.DocumentElement.GetAttribute("xmlns:content"); 602 | namespaceManager.AddNamespace("content", contentNamespace); 603 | 604 | // Iterate through the items in the RSS file 605 | foreach (XmlNode rssNode in rssNodes) 606 | { 607 | var feedItem = new SimpleFeedlyFeedItem(); 608 | 609 | XmlNode rssSubNode = rssNode.SelectSingleNode("link"); 610 | feedItem.Link = rssSubNode != null ? rssSubNode.InnerText : null; 611 | 612 | rssSubNode = rssNode.SelectSingleNode("title"); 613 | feedItem.Title = rssSubNode != null ? rssSubNode.InnerText : null; 614 | feedItem.Title = string.IsNullOrWhiteSpace(feedItem.Title) ? feedItem.Link : feedItem.Title; 615 | 616 | rssSubNode = rssNode.SelectSingleNode("description"); 617 | feedItem.Description = rssSubNode != null ? rssSubNode.InnerText : null; 618 | 619 | rssSubNode = rssNode.SelectSingleNode("//content:encoded", namespaceManager); 620 | feedItem.Content = rssSubNode != null ? rssSubNode.InnerText : null; 621 | 622 | rssSubNode = rssNode.SelectSingleNode("pubDate"); 623 | DateTime pubDate = DateTime.Now; 624 | 625 | if (rssSubNode != null) 626 | { 627 | if (DateTime.TryParse(rssSubNode.InnerText, out DateTime tmpDate)) 628 | { 629 | pubDate = tmpDate; 630 | } 631 | } 632 | 633 | feedItem.PublishingDate = pubDate; 634 | 635 | 636 | //feedItem.XmlData = rssNode.InnerXml.ToString(); 637 | 638 | if (!string.IsNullOrWhiteSpace(feedItem.Link)) 639 | { 640 | items.Add(feedItem); 641 | } 642 | } 643 | } 644 | catch (Exception ex) 645 | { 646 | currentEx = ex; 647 | } 648 | 649 | error = currentEx; 650 | return items; 651 | } 652 | } 653 | 654 | } 655 | -------------------------------------------------------------------------------- /RssCrawler/RssCrawler.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp3.1 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /RssCrawler/RssCrawler.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30204.135 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RssCrawler", "RssCrawler.csproj", "{676BAF4F-EAB1-432F-8476-1B6299347266}" 7 | EndProject 8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github", ".github", "{6189F359-BD66-40C6-BA04-AA02F98DE14A}" 9 | EndProject 10 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{FE137A13-6E58-4F27-8D9D-940C86112465}" 11 | ProjectSection(SolutionItems) = preProject 12 | ..\.github\workflows\crawl-rss.yml = ..\.github\workflows\crawl-rss.yml 13 | EndProjectSection 14 | EndProject 15 | Global 16 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 17 | Debug|Any CPU = Debug|Any CPU 18 | Release|Any CPU = Release|Any CPU 19 | EndGlobalSection 20 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 21 | {676BAF4F-EAB1-432F-8476-1B6299347266}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 22 | {676BAF4F-EAB1-432F-8476-1B6299347266}.Debug|Any CPU.Build.0 = Debug|Any CPU 23 | {676BAF4F-EAB1-432F-8476-1B6299347266}.Release|Any CPU.ActiveCfg = Release|Any CPU 24 | {676BAF4F-EAB1-432F-8476-1B6299347266}.Release|Any CPU.Build.0 = Release|Any CPU 25 | EndGlobalSection 26 | GlobalSection(SolutionProperties) = preSolution 27 | HideSolutionNode = FALSE 28 | EndGlobalSection 29 | GlobalSection(NestedProjects) = preSolution 30 | {FE137A13-6E58-4F27-8D9D-940C86112465} = {6189F359-BD66-40C6-BA04-AA02F98DE14A} 31 | EndGlobalSection 32 | GlobalSection(ExtensibilityGlobals) = postSolution 33 | SolutionGuid = {33DCE606-3D32-42A5-91EF-6C58F2800E7C} 34 | EndGlobalSection 35 | EndGlobal 36 | -------------------------------------------------------------------------------- /RssCrawler/RssCrawlerEngine.cs: -------------------------------------------------------------------------------- 1 | namespace RssCrawler 2 | { 3 | public enum RssCrawlerEngine 4 | { 5 | SyndicationFeed = 1, 6 | CodeHollowFeedReader = 2, 7 | ParseRssByXml = 3 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /RssCrawler/SimpleFeedlyDatabaseAccess.cs: -------------------------------------------------------------------------------- 1 | using LiteDB; 2 | using LiteDB.Engine; 3 | using RssCrawler.Models; 4 | using RssCrawler.Utils; 5 | using System; 6 | using System.Collections.Generic; 7 | using System.IO; 8 | using System.Linq; 9 | 10 | namespace RssCrawler 11 | { 12 | public class SimpleFeedlyDatabaseAccess 13 | { 14 | public const string DbName = "RssCrawler.db"; 15 | 16 | public static string GetDbPath() 17 | { 18 | var rootPath = EnvironmentHelper.GetApplicationRoot(); 19 | 20 | return Path.Combine(rootPath, "../", DbName); 21 | } 22 | 23 | public static void Shrink() 24 | { 25 | using (var db = new LiteDatabase(GetDbPath())) 26 | { 27 | db.Rebuild(); 28 | } 29 | } 30 | 31 | 32 | public static List GetActiveChannels() 33 | { 34 | var result = new List(); 35 | using (var db = new LiteDatabase(GetDbPath())) 36 | { 37 | var col = db.GetCollection("channels"); 38 | 39 | // Create unique index in Link field 40 | col.EnsureIndex(x => x.Link, true); 41 | 42 | return col.Find(x => x.IsActive == 1)?.ToList() ?? new List(); 43 | } 44 | } 45 | 46 | public static void UpdateChannelDefaultEngine(ObjectId channelId, RssCrawlerEngine engine) 47 | { 48 | using (var db = new LiteDatabase(GetDbPath())) 49 | { 50 | var col = db.GetCollection("channels"); 51 | 52 | // Create unique index in Id field 53 | col.EnsureIndex(x => x.Id, true); 54 | 55 | // UPDATE dbo.RssChannels SET RssCrawlerEngine = @engine WHERE Id = @channelId 56 | 57 | var channel = col.FindById(channelId); 58 | channel.RssCrawlerEngine = engine; 59 | col.Update(channel); 60 | } 61 | } 62 | 63 | public static void UpdateChannelErrorStatus(ObjectId channelId, bool isError, string errorMessage) 64 | { 65 | if (channelId == null) 66 | { 67 | return; 68 | } 69 | 70 | using (var db = new LiteDatabase(GetDbPath())) 71 | { 72 | var col = db.GetCollection("channels"); 73 | var channel = col.FindById(channelId); 74 | 75 | channel.IsError = isError; 76 | channel.ErrorMessage = errorMessage; 77 | 78 | col.Update(channel); 79 | } 80 | } 81 | 82 | public static void DeleteAllFeedItemByChannelId(ObjectId channelId) 83 | { 84 | using (var db = new LiteDatabase(GetDbPath())) 85 | { 86 | var col = db.GetCollection("feedItems"); 87 | col.DeleteMany(x => x.Channel.Id == channelId); 88 | } 89 | } 90 | 91 | public static void InsertFeedItem(RssFeedItemRow item) 92 | { 93 | using (var db = new LiteDatabase(GetDbPath())) 94 | { 95 | var col = db.GetCollection("feedItems"); 96 | 97 | //item.RssChannelDomainGroup = string.IsNullOrEmpty(item.RssChannelDomainGroup) ? item.Link : item.RssChannelDomainGroup; 98 | item.PublishingDate = item.PublishingDate == null || item.PublishingDate == DateTime.MinValue ? DateTime.Now : item.PublishingDate; 99 | 100 | col.Insert(item); 101 | } 102 | } 103 | 104 | public static void InsertFeedItems(List items) 105 | { 106 | if (items == null || items.Count == 0) 107 | { 108 | return; 109 | } 110 | 111 | using (var db = new LiteDatabase(GetDbPath())) 112 | { 113 | var col = db.GetCollection("feedItems"); 114 | 115 | foreach (var item in items) 116 | { 117 | //item.RssChannelDomainGroup = string.IsNullOrEmpty(item.RssChannelDomainGroup) ? item.Link : item.RssChannelDomainGroup; 118 | item.PublishingDate = item.PublishingDate == null || item.PublishingDate == DateTime.MinValue ? DateTime.Now : item.PublishingDate; 119 | } 120 | 121 | col.InsertBulk(items); 122 | } 123 | } 124 | 125 | public static List GetAllFeedItems() 126 | { 127 | using (var db = new LiteDatabase(GetDbPath())) 128 | { 129 | var col = db.GetCollection("feedItems"); 130 | 131 | return col 132 | .Include(x => x.Channel) 133 | .FindAll() 134 | .Where(x=>x?.Channel?.Id != null) 135 | .OrderBy(x => x.Channel.Id) 136 | .ThenByDescending(x => x.PublishingDate) 137 | ?.ToList() ?? new List(); 138 | } 139 | } 140 | 141 | public static bool IsBlackListWord(string md5String) 142 | { 143 | using (var db = new LiteDatabase(GetDbPath())) 144 | { 145 | var col = db.GetCollection("blacklists"); 146 | 147 | return col.Exists(x => x.ShrinkedTitleHash == md5String); 148 | } 149 | } 150 | 151 | public static bool IsExistedFeedItem(ObjectId channelId, string channelDomainGroup, string feedItemKey) 152 | { 153 | using (var db = new LiteDatabase(GetDbPath())) 154 | { 155 | var col = db.GetCollection("feedItems"); 156 | 157 | return col 158 | .Include(x => x.Channel) 159 | .Exists(x => x.Channel.Id == channelId && (x.Channel.DomainGroup == null ? x.Channel.Link : x.Channel.DomainGroup) == channelDomainGroup && x.FeedItemKey == feedItemKey); 160 | } 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /RssCrawler/Utils/EnvironmentHelper.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | using System.Text.RegularExpressions; 3 | 4 | namespace RssCrawler.Utils 5 | { 6 | public class EnvironmentHelper 7 | { 8 | public static string GetApplicationRoot() 9 | { 10 | var exePath = Path.GetDirectoryName(System.Reflection 11 | .Assembly.GetExecutingAssembly().CodeBase); 12 | Regex appPathMatcher = new Regex(@"(?{ 20 | $("#randomKey").text(`Random string: ${makeid(5)}`); 21 | repeatPrint(); 22 | }, 1000); 23 | })(); 24 | --------------------------------------------------------------------------------