├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── RSSFeedAggregator.sln └── RSSFeedAggregator ├── .gitignore ├── Properties ├── launchSettings.json ├── serviceDependencies.json └── serviceDependencies.local.json ├── Queries ├── Articles_FindDuplicates.sql ├── Articles_FindUseless.sql ├── Articles_Latest.sql ├── Counter_ArticleDuplicates.sql ├── Counter_Reporter.sql ├── Errors_Unhandled.sql ├── Ingested_Over.sql ├── Ingested_Under.sql ├── RSSFeeds_Insert.sql ├── RSSFeeds_TurnedOff.sql ├── RSSFeeds_TurnedOn.sql ├── ReadTier_Distribution.sql ├── Reporter_FindMissing.sql ├── Reporter_LastInsert.sql ├── Reporter_UselessArticleCount.sql ├── TopArticles_HighestScore.sql └── TopArticles_MainPageScoring.sql ├── RSSFeedAggregator.cs ├── RSSFeedAggregator.csproj ├── StoredProcedures ├── NewsFeed.sql ├── NewsFeed_AddComment.sql ├── NewsFeed_AddStarredArticle.sql ├── NewsFeed_DeleteBotStarred.sql ├── NewsFeed_DisplayComments.sql ├── NewsFeed_LogSearch.sql ├── NewsFeed_MainFeed.sql └── RSSFunctionApp.sql ├── TableSchemas ├── NewsFeed_ArticleComments.sql ├── NewsFeed_BannedTrends.sql ├── NewsFeed_NewsArticles.sql ├── NewsFeed_SearchHistory.sql ├── NewsFeed_StarredArticles.sql ├── NewsFeed_Tags.sql ├── RSSFunctionApp_ErrorLogs.sql ├── RSSFunctionApp_IngestionLogs.sql ├── RSSFunctionApp_RSSFeeds.sql └── TopArticles_StarbotKeywords.sql └── host.json /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Oo]ut/ 33 | [Ll]og/ 34 | [Ll]ogs/ 35 | 36 | # Visual Studio 2015/2017 cache/options directory 37 | .vs/ 38 | # Uncomment if you have tasks that create the project's static files in wwwroot 39 | #wwwroot/ 40 | 41 | # Visual Studio 2017 auto generated files 42 | Generated\ Files/ 43 | 44 | # MSTest test Results 45 | [Tt]est[Rr]esult*/ 46 | [Bb]uild[Ll]og.* 47 | 48 | # NUnit 49 | *.VisualState.xml 50 | TestResult.xml 51 | nunit-*.xml 52 | 53 | # Build Results of an ATL Project 54 | [Dd]ebugPS/ 55 | [Rr]eleasePS/ 56 | dlldata.c 57 | 58 | # Benchmark Results 59 | BenchmarkDotNet.Artifacts/ 60 | 61 | # .NET Core 62 | project.lock.json 63 | project.fragment.lock.json 64 | artifacts/ 65 | 66 | # ASP.NET Scaffolding 67 | ScaffoldingReadMe.txt 68 | 69 | # StyleCop 70 | StyleCopReport.xml 71 | 72 | # Files built by Visual Studio 73 | *_i.c 74 | *_p.c 75 | *_h.h 76 | *.ilk 77 | *.meta 78 | *.obj 79 | *.iobj 80 | *.pch 81 | *.pdb 82 | *.ipdb 83 | *.pgc 84 | *.pgd 85 | *.rsp 86 | *.sbr 87 | *.tlb 88 | *.tli 89 | *.tlh 90 | *.tmp 91 | *.tmp_proj 92 | *_wpftmp.csproj 93 | *.log 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio LightSwitch build output 298 | **/*.HTMLClient/GeneratedArtifacts 299 | **/*.DesktopClient/GeneratedArtifacts 300 | **/*.DesktopClient/ModelManifest.xml 301 | **/*.Server/GeneratedArtifacts 302 | **/*.Server/ModelManifest.xml 303 | _Pvt_Extensions 304 | 305 | # Paket dependency manager 306 | .paket/paket.exe 307 | paket-files/ 308 | 309 | # FAKE - F# Make 310 | .fake/ 311 | 312 | # CodeRush personal settings 313 | .cr/personal 314 | 315 | # Python Tools for Visual Studio (PTVS) 316 | __pycache__/ 317 | *.pyc 318 | 319 | # Cake - Uncomment if you are using it 320 | # tools/** 321 | # !tools/packages.config 322 | 323 | # Tabs Studio 324 | *.tss 325 | 326 | # Telerik's JustMock configuration file 327 | *.jmconfig 328 | 329 | # BizTalk build output 330 | *.btp.cs 331 | *.btm.cs 332 | *.odx.cs 333 | *.xsd.cs 334 | 335 | # OpenCover UI analysis results 336 | OpenCover/ 337 | 338 | # Azure Stream Analytics local run output 339 | ASALocalRun/ 340 | 341 | # MSBuild Binary and Structured Log 342 | *.binlog 343 | 344 | # NVidia Nsight GPU debugger configuration file 345 | *.nvuser 346 | 347 | # MFractors (Xamarin productivity tool) working folder 348 | .mfractor/ 349 | 350 | # Local History for Visual Studio 351 | .localhistory/ 352 | 353 | # BeatPulse healthcheck temp database 354 | healthchecksdb 355 | 356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 357 | MigrationBackup/ 358 | 359 | # Ionide (cross platform F# VS Code tools) working folder 360 | .ionide/ 361 | 362 | # Fody - auto-generated XML schema 363 | FodyWeavers.xsd -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Kyler Condran 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RSS Feed Aggregator 2 | 3 | RSS Feed Aggregator is a database driven RSS Feed Aggregator application built on Azure Functions. It contains the backend features for the administration of a news aggregation website. If you want to build your own news website this is a great place to start! 4 | 5 | 6 | 7 | ## Download 8 | Code available on GitHub [Download](https://github.com/KylerCondran/RSSFeedAggregator) 9 | 10 | ## Getting Started 11 | 12 | 1. Acquire an [Azure Functions](https://azure.microsoft.com/) instance, it is free for new signups for 30 days. If you want to instead host the application yourself you can convert it to a C# console application. 13 | 14 | 2. Acquire a MS SQL Server instance where you can host a database. Once you have a database you can use the .SQL files in the Table Schemas folder to create the initial tables. Also run and create the Stored Procedures. 15 | 16 | 3. Add your database connection string in the initialization method. Also set the user agent string and paywall remover website if interested. 17 | 18 | 4. The main run method contains a [NCRONTAB Timer Trigger](https://learn.microsoft.com/en-us/azure/azure-functions/functions-bindings-timer) "0 0 * * * *", this controls how often the application is executed. 19 | 20 | 5. The main run method contains time wheels, switch statements that break up the workload of downloading large volumes of RSS feeds when executed during certain hours of the day. Each readtier is a block of RSS Feeds which will be downloaded together in batches. It is a good idea to spread RSS Feeds out uniformly in these tiers for best performance. 21 | 22 | - ReadTiers 1-4: Fast - check every four hours (6 times per day) 23 | - ReadTiers 5-10: Regular - check every 6 hours (4 times per day) 24 | - ReadTiers 11-13: Slow - check every 12 hours (2 times per day) 25 | - ReadTier 14: Daily - every 24 hours (1 time per day) 26 | 27 | 6. Use the RSSFeeds_Insert.sql file in the Queries folder to add RSS feeds to the RSSFunctionApp_RSSFeeds Table. Set enabled to 1 to turn it on, set the compareall flag if the RSS feed serves links from other providers, set the removepaywall flag if the feed contains content that is paywalled, set the ingestionlogging flag when first activating to monitor if you are under consuming or over consuming the feed in the RSSFunctionApp_IngestionLogs Table, you can speed the consumption rate up or slow it down by increasing or decreasing the readtier number depending on how often the RSS feed posts new content. It is best to not over consume the feed so you do not risk getting blocked. 28 | 29 | 7. Trends are generated every 12 hours in the NewsFeed_Tags table, it reads every news article title in the database and picks out the 30 most frequent words. If you do not want a trend to appear in the trend table, add the word to the NewsFeed_BannedTrends table. 30 | 31 | 8. Article title scoring is done using lexicon based sentiment analysis when keywords are added to the TopArticles_StarbotKeywords table, there are 12 tiers a keyword can have. Tiers 1-6 are positive scoring words, tiers 7-12 are negative scoring words. The total score for the article title is calculated and added with the news article record to the database so it can be configured to appear higher or lower in a website feed. 32 | 33 | - Tier 1: 13 points 34 | - Tier 2: 11 points 35 | - Tier 3: 9 points 36 | - Tier 4: 5 points 37 | - Tier 5: 3 points 38 | - Tier 6: 1 point 39 | - Tier 7: -1 point 40 | - Tier 8: -3 points 41 | - Tier 9: -5 points 42 | - Tier 10: -9 points 43 | - Tier 11: -11 points 44 | - Tier 12: -13 points 45 | 46 | This allows you to score news articles based on what you are most interested in and what you are not interested in. This list is completely subjective and to be designed by you based on what news articles are most interesting to you. It is best to add several thousand words to this table to make articles scored efficiently. 47 | 48 | 9. After your scoring system is set up, periodically bots will go through and favorite articles that have the highest scores. There are currently 4 bots. Bot one will favorite an article once an hour. Bot two will favorite an article four times a day. Bot three will favorite an article twice a day. Bot four will favorite an article once per day. The same numbered bot will never favorite the same article twice. This causes a staggering effect that can easily be overpowered by legitimate users and causes interesting articles to be shifted towards the top to potentially catch a users interest. 49 | 50 | 10. Once per day administrative tasks are performed, records older than 2 weeks are deleted from the database, this can be configured as desired in the RSSFunctionAPP stored procedure. 51 | 52 | ## Questions? 53 | 54 | Feel free to contact me with any questions! 55 | 56 | You can reach me on [LinkedIn](https://www.linkedin.com/in/kylercondran/) or check out my [GitHub](https://github.com/KylerCondran/) for my other projects. 57 | -------------------------------------------------------------------------------- /RSSFeedAggregator.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.6.33829.357 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RSSFeedAggregator", "RSSFeedAggregator\RSSFeedAggregator.csproj", "{3BC70D9A-2D9C-4D57-8FC2-8E12F02FB240}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {3BC70D9A-2D9C-4D57-8FC2-8E12F02FB240}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {3BC70D9A-2D9C-4D57-8FC2-8E12F02FB240}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {3BC70D9A-2D9C-4D57-8FC2-8E12F02FB240}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {3BC70D9A-2D9C-4D57-8FC2-8E12F02FB240}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {EFE3767C-28C0-471A-9910-DCCA2FAF749E} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /RSSFeedAggregator/.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # Azure Functions localsettings file 5 | local.settings.json 6 | 7 | # User-specific files 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Build results 17 | [Dd]ebug/ 18 | [Dd]ebugPublic/ 19 | [Rr]elease/ 20 | [Rr]eleases/ 21 | x64/ 22 | x86/ 23 | bld/ 24 | [Bb]in/ 25 | [Oo]bj/ 26 | [Ll]og/ 27 | 28 | # Visual Studio 2015 cache/options directory 29 | .vs/ 30 | # Uncomment if you have tasks that create the project's static files in wwwroot 31 | #wwwroot/ 32 | 33 | # MSTest test Results 34 | [Tt]est[Rr]esult*/ 35 | [Bb]uild[Ll]og.* 36 | 37 | # NUNIT 38 | *.VisualState.xml 39 | TestResult.xml 40 | 41 | # Build Results of an ATL Project 42 | [Dd]ebugPS/ 43 | [Rr]eleasePS/ 44 | dlldata.c 45 | 46 | # DNX 47 | project.lock.json 48 | project.fragment.lock.json 49 | artifacts/ 50 | 51 | *_i.c 52 | *_p.c 53 | *_i.h 54 | *.ilk 55 | *.meta 56 | *.obj 57 | *.pch 58 | *.pdb 59 | *.pgc 60 | *.pgd 61 | *.rsp 62 | *.sbr 63 | *.tlb 64 | *.tli 65 | *.tlh 66 | *.tmp 67 | *.tmp_proj 68 | *.log 69 | *.vspscc 70 | *.vssscc 71 | .builds 72 | *.pidb 73 | *.svclog 74 | *.scc 75 | 76 | # Chutzpah Test files 77 | _Chutzpah* 78 | 79 | # Visual C++ cache files 80 | ipch/ 81 | *.aps 82 | *.ncb 83 | *.opendb 84 | *.opensdf 85 | *.sdf 86 | *.cachefile 87 | *.VC.db 88 | *.VC.VC.opendb 89 | 90 | # Visual Studio profiler 91 | *.psess 92 | *.vsp 93 | *.vspx 94 | *.sap 95 | 96 | # TFS 2012 Local Workspace 97 | $tf/ 98 | 99 | # Guidance Automation Toolkit 100 | *.gpState 101 | 102 | # ReSharper is a .NET coding add-in 103 | _ReSharper*/ 104 | *.[Rr]e[Ss]harper 105 | *.DotSettings.user 106 | 107 | # JustCode is a .NET coding add-in 108 | .JustCode 109 | 110 | # TeamCity is a build add-in 111 | _TeamCity* 112 | 113 | # DotCover is a Code Coverage Tool 114 | *.dotCover 115 | 116 | # NCrunch 117 | _NCrunch_* 118 | .*crunch*.local.xml 119 | nCrunchTemp_* 120 | 121 | # MightyMoose 122 | *.mm.* 123 | AutoTest.Net/ 124 | 125 | # Web workbench (sass) 126 | .sass-cache/ 127 | 128 | # Installshield output folder 129 | [Ee]xpress/ 130 | 131 | # DocProject is a documentation generator add-in 132 | DocProject/buildhelp/ 133 | DocProject/Help/*.HxT 134 | DocProject/Help/*.HxC 135 | DocProject/Help/*.hhc 136 | DocProject/Help/*.hhk 137 | DocProject/Help/*.hhp 138 | DocProject/Help/Html2 139 | DocProject/Help/html 140 | 141 | # Click-Once directory 142 | publish/ 143 | 144 | # Publish Web Output 145 | *.[Pp]ublish.xml 146 | *.azurePubxml 147 | # TODO: Comment the next line if you want to checkin your web deploy settings 148 | # but database connection strings (with potential passwords) will be unencrypted 149 | #*.pubxml 150 | *.publishproj 151 | 152 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 153 | # checkin your Azure Web App publish settings, but sensitive information contained 154 | # in these scripts will be unencrypted 155 | PublishScripts/ 156 | 157 | # NuGet Packages 158 | *.nupkg 159 | # The packages folder can be ignored because of Package Restore 160 | **/packages/* 161 | # except build/, which is used as an MSBuild target. 162 | !**/packages/build/ 163 | # Uncomment if necessary however generally it will be regenerated when needed 164 | #!**/packages/repositories.config 165 | # NuGet v3's project.json files produces more ignoreable files 166 | *.nuget.props 167 | *.nuget.targets 168 | 169 | # Microsoft Azure Build Output 170 | csx/ 171 | *.build.csdef 172 | 173 | # Microsoft Azure Emulator 174 | ecf/ 175 | rcf/ 176 | 177 | # Windows Store app package directories and files 178 | AppPackages/ 179 | BundleArtifacts/ 180 | Package.StoreAssociation.xml 181 | _pkginfo.txt 182 | 183 | # Visual Studio cache files 184 | # files ending in .cache can be ignored 185 | *.[Cc]ache 186 | # but keep track of directories ending in .cache 187 | !*.[Cc]ache/ 188 | 189 | # Others 190 | ClientBin/ 191 | ~$* 192 | *~ 193 | *.dbmdl 194 | *.dbproj.schemaview 195 | *.jfm 196 | *.pfx 197 | *.publishsettings 198 | node_modules/ 199 | orleans.codegen.cs 200 | 201 | # Since there are multiple workflows, uncomment next line to ignore bower_components 202 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 203 | #bower_components/ 204 | 205 | # RIA/Silverlight projects 206 | Generated_Code/ 207 | 208 | # Backup & report files from converting an old project file 209 | # to a newer Visual Studio version. Backup files are not needed, 210 | # because we have git ;-) 211 | _UpgradeReport_Files/ 212 | Backup*/ 213 | UpgradeLog*.XML 214 | UpgradeLog*.htm 215 | 216 | # SQL Server files 217 | *.mdf 218 | *.ldf 219 | 220 | # Business Intelligence projects 221 | *.rdl.data 222 | *.bim.layout 223 | *.bim_*.settings 224 | 225 | # Microsoft Fakes 226 | FakesAssemblies/ 227 | 228 | # GhostDoc plugin setting file 229 | *.GhostDoc.xml 230 | 231 | # Node.js Tools for Visual Studio 232 | .ntvs_analysis.dat 233 | 234 | # Visual Studio 6 build log 235 | *.plg 236 | 237 | # Visual Studio 6 workspace options file 238 | *.opt 239 | 240 | # Visual Studio LightSwitch build output 241 | **/*.HTMLClient/GeneratedArtifacts 242 | **/*.DesktopClient/GeneratedArtifacts 243 | **/*.DesktopClient/ModelManifest.xml 244 | **/*.Server/GeneratedArtifacts 245 | **/*.Server/ModelManifest.xml 246 | _Pvt_Extensions 247 | 248 | # Paket dependency manager 249 | .paket/paket.exe 250 | paket-files/ 251 | 252 | # FAKE - F# Make 253 | .fake/ 254 | 255 | # JetBrains Rider 256 | .idea/ 257 | *.sln.iml 258 | 259 | # CodeRush 260 | .cr/ 261 | 262 | # Python Tools for Visual Studio (PTVS) 263 | __pycache__/ 264 | *.pyc -------------------------------------------------------------------------------- /RSSFeedAggregator/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "RSSFeedAggregator": { 4 | "commandName": "Project", 5 | "commandLineArgs": "--port 7138", 6 | "launchBrowser": false 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /RSSFeedAggregator/Properties/serviceDependencies.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "appInsights1": { 4 | "type": "appInsights" 5 | }, 6 | "storage1": { 7 | "type": "storage", 8 | "connectionId": "AzureWebJobsStorage" 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /RSSFeedAggregator/Properties/serviceDependencies.local.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "appInsights1": { 4 | "type": "appInsights.sdk" 5 | }, 6 | "storage1": { 7 | "type": "storage.emulator", 8 | "connectionId": "AzureWebJobsStorage" 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Articles_FindDuplicates.sql: -------------------------------------------------------------------------------- 1 | /* This query finds which reporters have the most duplicates in the table */ 2 | SELECT Reporter, count(*) as total FROM [NewsFeed_NewsArticles] WHERE Article IN (SELECT Article 3 | FROM [NewsFeed_NewsArticles] 4 | GROUP BY Article 5 | HAVING COUNT(Article) > 1) group by reporter order by total desc -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Articles_FindUseless.sql: -------------------------------------------------------------------------------- 1 | /* This query finds which articles do not have any scoring - used to fine tune scoring by adding keywords to the TopArticles_StarbotKeywords table */ 2 | SELECT TOP (1000) [ID] 3 | ,[Reporter] 4 | ,[Article] 5 | FROM [NewsFeed_NewsArticles] WHERE StarBotScore = 0 AND datediff(hh,DateTime,GETDATE()) < 12 ORDER BY ID DESC -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Articles_Latest.sql: -------------------------------------------------------------------------------- 1 | /* This query finds the most recently downloaded articles */ 2 | SELECT TOP (1000) [ID] 3 | ,[Reporter] 4 | ,[Article] 5 | ,[URL] 6 | ,[Image] 7 | ,[DateTime] 8 | ,[Summary] 9 | ,[StarBotScore] 10 | FROM [NewsFeed_NewsArticles] ORDER BY ID DESC 11 | -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Counter_ArticleDuplicates.sql: -------------------------------------------------------------------------------- 1 | /* This query finds which articles are most duplicated in the table */ 2 | SELECT [Article], Count(*) as [counter] 3 | FROM [NewsFeed_NewsArticles] 4 | group by [Article] 5 | order by [counter] desc 6 | -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Counter_Reporter.sql: -------------------------------------------------------------------------------- 1 | /* This query finds the total article count in the table and total article count broken down by reporter */ 2 | SELECT Count(*) AS [ArticleCount] FROM [NewsFeed_NewsArticles] 3 | SELECT [Reporter], Count(*) as [counter] 4 | FROM [NewsFeed_NewsArticles] 5 | group by [Reporter] 6 | order by [counter] desc 7 | -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Errors_Unhandled.sql: -------------------------------------------------------------------------------- 1 | /* This query finds the more serious errors - set handled flag to 1 after dealing with the error to ignore */ 2 | SELECT TOP (1000) [ID] 3 | ,[Type] 4 | ,[Message] 5 | ,[Source] 6 | ,[FeedName] 7 | ,[ArticleTitle] 8 | ,[Subroutine] 9 | ,[DateTime] 10 | ,[Handled] 11 | FROM [RSSFunctionApp_ErrorLogs] 12 | WHERE Handled = 0 AND Message NOT IN ('The operation has timed out.', 'The remote server returned an error: (503) Service Unavailable.', 13 | 'The remote server returned an error: (504) Gateway Time-out.','The remote server returned an error: (502) Bad Gateway.','The response ended prematurely.') AND Type NOT IN ('XmlException') 14 | ORDER BY DateTime DESC -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Ingested_Over.sql: -------------------------------------------------------------------------------- 1 | /* This query finds RSS Feeds that are being over consumed - increase the readtier number in the RSSFunctionAPP_RSSFeeds table to slow down the consumption rate */ 2 | SELECT FeedName, count(*) AS [UploadedPercentage] 3 | FROM [RSSFunctionApp_IngestionLogs] 4 | WHERE UploadedPercentage = '0' 5 | GROUP BY FeedName 6 | HAVING COUNT(*) > 0 7 | ORDER BY UploadedPercentage DESC -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Ingested_Under.sql: -------------------------------------------------------------------------------- 1 | /* This query finds RSS Feeds that are being under consumed - decrease the readtier number in the RSSFunctionAPP_RSSFeeds table to speed up the consumption rate */ 2 | SELECT FeedName, count(*) AS [UploadedPercentage] 3 | FROM [RSSFunctionApp_IngestionLogs] 4 | WHERE UploadedPercentage = '100' 5 | GROUP BY FeedName 6 | HAVING COUNT(*) > 0 7 | ORDER BY UploadedPercentage DESC -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/RSSFeeds_Insert.sql: -------------------------------------------------------------------------------- 1 | /* This query adds new RSS Feeds to the table */ 2 | INSERT INTO RSSFunctionApp_RSSFeeds (FeedName, URL, ReadTier, Enabled, Category, CompareAll, RemovePaywall, IngestionLogging) VALUES ('', '', 14, 0, '', 0, 0, 1) -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/RSSFeeds_TurnedOff.sql: -------------------------------------------------------------------------------- 1 | /* This query finds which RSS Feeds are currently disabled */ 2 | SELECT TOP (1000) [ID] 3 | ,[FeedName] 4 | ,[URL] 5 | ,[ReadTier] 6 | ,[Enabled] 7 | ,[CompareAll] 8 | ,[RemovePaywall] 9 | ,[IngestionLogging] 10 | ,[Category] 11 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 0 ORDER BY FeedName ASC 12 | -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/RSSFeeds_TurnedOn.sql: -------------------------------------------------------------------------------- 1 | /* This query finds which RSS Feeds are currently enabled and shows what time they are downloaded, may have to be adjusted for timezone */ 2 | SELECT TOP (1000) [ID] 3 | ,[FeedName] 4 | ,[URL] 5 | ,[ReadTier] 6 | ,[Enabled] 7 | ,[CompareAll] 8 | ,[RemovePaywall] 9 | ,[IngestionLogging] 10 | ,CASE 11 | WHEN ReadTier = 1 THEN '7PM,11PM,3AM,7AM,11AM,3PM' 12 | WHEN ReadTier = 2 THEN '6PM,10PM,2AM,6AM,10AM,2PM' 13 | WHEN ReadTier = 3 THEN '9PM,1AM,5AM,9AM,1PM,5PM' 14 | WHEN ReadTier = 4 THEN '8PM,12AM,4AM,8AM,12PM,4PM' 15 | WHEN ReadTier = 5 THEN '5PM,11PM,5AM,11AM' 16 | WHEN ReadTier = 6 THEN '10PM,4AM,10AM,4PM' 17 | WHEN ReadTier = 7 THEN '9PM,3AM,9AM,3PM' 18 | WHEN ReadTier = 8 THEN '8PM,2AM,8AM,2PM' 19 | WHEN ReadTier = 9 THEN '7PM,1AM,7AM,1PM' 20 | WHEN ReadTier = 10 THEN '6PM,12AM,6AM,12PM' 21 | WHEN ReadTier = 11 THEN '4AM,4PM' 22 | WHEN ReadTier = 12 THEN '8AM,8PM' 23 | WHEN ReadTier = 13 THEN '12AM,12PM' 24 | WHEN ReadTier = 14 THEN '5AM' 25 | END AS [ReadTimes] 26 | ,[Category] 27 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 ORDER BY ReadTier, FeedName ASC 28 | -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/ReadTier_Distribution.sql: -------------------------------------------------------------------------------- 1 | /* This query finds the overall readtier distribution - use this to spread the RSS Feeds out uniformly in fast, regular, or slow, for best performance */ 2 | SELECT '1' AS [Tier], Count(*) AS [Fast] 3 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 1 4 | SELECT '2' AS [Tier], Count(*) AS [Fast] 5 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 2 6 | SELECT '3' AS [Tier], Count(*) AS [Fast] 7 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 3 8 | SELECT '4' AS [Tier], Count(*) AS [Fast] 9 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 4 10 | SELECT '5' AS [Tier], Count(*) AS [Regular] 11 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 5 12 | SELECT '6' AS [Tier], Count(*) AS [Regular] 13 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 6 14 | SELECT '7' AS [Tier], Count(*) AS [Regular] 15 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 7 16 | SELECT '8' AS [Tier], Count(*) AS [Regular] 17 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 8 18 | SELECT '9' AS [Tier], Count(*) AS [Regular] 19 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 9 20 | SELECT '10' AS [Tier], Count(*) AS [Regular] 21 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 10 22 | SELECT '11' AS [Tier], Count(*) AS [Slow] 23 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 11 24 | SELECT '12' AS [Tier], Count(*) AS [Slow] 25 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 12 26 | SELECT '13' AS [Tier], Count(*) AS [Slow] 27 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 13 28 | SELECT '14' AS [Tier], Count(*) AS [Daily] 29 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 14 30 | -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Reporter_FindMissing.sql: -------------------------------------------------------------------------------- 1 | /* This query finds if any reporters do not have any articles downloaded in the table - used to troubleshoot and fix feeds */ 2 | SELECT reporter 3 | FROM (SELECT FeedName FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1) R(reporter) 4 | EXCEPT 5 | SELECT DISTINCT [Reporter] 6 | FROM [NewsFeed_NewsArticles] ORDER BY [Reporter] ASC 7 | -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Reporter_LastInsert.sql: -------------------------------------------------------------------------------- 1 | /* This query finds the date of the last article downloaded in the table for a reporter - used to troubleshoot and fix feeds */ 2 | select x.reporter, x.datetime 3 | from ( 4 | select reporter, datetime, 5 | row_number() over (partition by reporter order by datetime desc) as _rn 6 | from NewsFeed_NewsArticles 7 | ) x 8 | where x._rn = 1 9 | ORDER BY DateTime ASC 10 | -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/Reporter_UselessArticleCount.sql: -------------------------------------------------------------------------------- 1 | /* This query finds how many non scored articles exist per reporter - used to fine tune the article scoring in the TopArticles_StarbotKeywords table */ 2 | SELECT [Reporter], Count(*) as [counter] 3 | FROM [NewsFeed_NewsArticles] 4 | WHERE StarBotScore = 0 5 | group by [Reporter] 6 | order by [counter] desc 7 | -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/TopArticles_HighestScore.sql: -------------------------------------------------------------------------------- 1 | /* This query finds the highest scoring articles for time frames */ 2 | SELECT TOP (5) [Reporter],[Article],[URL],[StarBotScore] 3 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-5,GETDATE()) AND dateadd(hour,7,GETDATE()) ORDER BY StarBotScore DESC 4 | SELECT TOP (5) [Reporter],[Article],[URL],[StarBotScore] 5 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-17,GETDATE()) AND dateadd(hour,-5,GETDATE()) ORDER BY StarBotScore DESC 6 | SELECT TOP (5) [Reporter],[Article],[URL],[StarBotScore] 7 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-65,GETDATE()) AND dateadd(hour,-17,GETDATE()) ORDER BY StarBotScore DESC 8 | SELECT TOP (5) [Reporter],[Article],[URL],[StarBotScore] 9 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-161,GETDATE()) AND dateadd(hour,-65,GETDATE()) ORDER BY StarBotScore DESC 10 | SELECT TOP (5) [Reporter],[Article],[URL],[StarBotScore] 11 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-329,GETDATE()) AND dateadd(hour,-161,GETDATE()) ORDER BY StarBotScore DESC 12 | SELECT TOP (50) [Reporter],[Article],[URL],[DateTime],[StarBotScore] 13 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-24,GETDATE()) AND GETDATE() ORDER BY StarBotScore DESC 14 | /* This query finds the highest scoring articles in the table and uses a decay algorithm to give the most recent more precedence */ 15 | SELECT TOP (50) [Article],[URL],[Reporter],[DateTime],CASE 16 | WHEN (datediff(hh,DateTime,GETDATE()) >= 102) THEN [StarBotScore] - 50 17 | WHEN (datediff(hh,DateTime,GETDATE()) >= 96) THEN [StarBotScore] - 40 18 | WHEN (datediff(hh,DateTime,GETDATE()) >= 90) THEN [StarBotScore] - 35 19 | WHEN (datediff(hh,DateTime,GETDATE()) >= 84) THEN [StarBotScore] - 30 20 | WHEN (datediff(hh,DateTime,GETDATE()) >= 78) THEN [StarBotScore] - 25 21 | WHEN (datediff(hh,DateTime,GETDATE()) >= 72) THEN [StarBotScore] - 20 22 | WHEN (datediff(hh,DateTime,GETDATE()) >= 66) THEN [StarBotScore] - 18 23 | WHEN (datediff(hh,DateTime,GETDATE()) >= 60) THEN [StarBotScore] - 16 24 | WHEN (datediff(hh,DateTime,GETDATE()) >= 54) THEN [StarBotScore] - 14 25 | WHEN (datediff(hh,DateTime,GETDATE()) >= 48) THEN [StarBotScore] - 12 26 | WHEN (datediff(hh,DateTime,GETDATE()) >= 42) THEN [StarBotScore] - 10 27 | WHEN (datediff(hh,DateTime,GETDATE()) >= 36) THEN [StarBotScore] - 8 28 | WHEN (datediff(hh,DateTime,GETDATE()) >= 30) THEN [StarBotScore] - 6 29 | WHEN (datediff(hh,DateTime,GETDATE()) >= 24) THEN [StarBotScore] - 4 30 | WHEN (datediff(hh,DateTime,GETDATE()) >= 18) THEN [StarBotScore] - 3 31 | WHEN (datediff(hh,DateTime,GETDATE()) >= 12) THEN [StarBotScore] - 2 32 | WHEN (datediff(hh,DateTime,GETDATE()) >= 6) THEN [StarBotScore] - 1 33 | ELSE [StarBotScore] 34 | END AS [StarBotScore] 35 | FROM [NewsFeed_NewsArticles] ORDER BY StarBotScore DESC, DateTime DESC 36 | -------------------------------------------------------------------------------- /RSSFeedAggregator/Queries/TopArticles_MainPageScoring.sql: -------------------------------------------------------------------------------- 1 | /* This query finds articles that should appear on the main page due to having a score over a certain threshold */ 2 | SELECT TOP (50) [ID],[Reporter],[Article],[URL],[StarBotScore] 3 | FROM [NewsFeed_NewsArticles] WHERE StarBotScore > 28 ORDER BY StarBotScore DESC 4 | -------------------------------------------------------------------------------- /RSSFeedAggregator/RSSFeedAggregator.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Microsoft.Azure.WebJobs; 3 | using System.Data.SqlClient; 4 | using System.Net; 5 | using System.ServiceModel.Syndication; 6 | using System.Xml; 7 | using System.Text.RegularExpressions; 8 | using System.Data; 9 | using System.Collections.Generic; 10 | using System.Linq; 11 | 12 | namespace RSSFeedAggregator 13 | { 14 | public class RSSFeedAggregator 15 | { 16 | string _connectionString; 17 | string _userAgent; 18 | string _paywallRemover; 19 | int _connectionTimeout; 20 | int _ArticleKeepAge; 21 | bool _scoreArticles; 22 | bool _StarBotsActive; 23 | Dictionary _ArticleScoreKeywords; 24 | 25 | #region "Initialization" 26 | public void Initialize() 27 | { 28 | _connectionString = "ConnectionStringGoesHere"; 29 | _userAgent = "UserAgentGoesHere"; 30 | _connectionTimeout = 5000; //(milliseconds) 5 second timeout 31 | _paywallRemover = "https://PaywallRemoverWebsite/proxy?q="; 32 | _ArticleKeepAge = -14; //(days) fetch articles with an age of 2 weeks maximum 33 | _scoreArticles = false; //set to true to score each article based on keywords entered in the TopArticles_StarbotKeywords table 34 | _StarBotsActive = false; //set to true to have bots periodically star (favorite) the top scoring articles, requires TopArticles_StarbotKeywords table set up 35 | _ArticleScoreKeywords = InitKeywordTable(); 36 | } 37 | #endregion 38 | 39 | #region "Main" 40 | [FunctionName("RSSFeedAggregator")] 41 | public void Run([TimerTrigger("0 0 * * * *")] TimerInfo myTimer) 42 | { 43 | try 44 | { 45 | Initialize(); 46 | //Fast - every four hours time wheel (6 times per day) 47 | switch (true) 48 | { 49 | case bool _ when DateTime.Now.AddHours(1).Hour % 4 == 0: 50 | ReadRssLists(1); 51 | StarBot(1, 1, -5, 7); 52 | break; 53 | case bool _ when DateTime.Now.AddHours(2).Hour % 4 == 0: 54 | ReadRssLists(2); 55 | StarBot(1, 1, -5, 7); 56 | break; 57 | case bool _ when DateTime.Now.AddHours(3).Hour % 4 == 0: 58 | ReadRssLists(3); 59 | StarBot(1, 1, -5, 7); 60 | break; 61 | case bool _ when DateTime.Now.Hour % 4 == 0: 62 | ReadRssLists(4); 63 | StarBot(1, 1, -5, 7); 64 | break; 65 | } 66 | //Regular - every 6 hours time wheel (4 times per day) 67 | switch (true) 68 | { 69 | case bool _ when DateTime.Now.AddHours(1).Hour % 6 == 0: 70 | ReadRssLists(5); 71 | break; 72 | case bool _ when DateTime.Now.AddHours(2).Hour % 6 == 0: 73 | ReadRssLists(6); 74 | break; 75 | case bool _ when DateTime.Now.AddHours(3).Hour % 6 == 0: 76 | ReadRssLists(7); 77 | break; 78 | case bool _ when DateTime.Now.AddHours(4).Hour % 6 == 0: 79 | ReadRssLists(8); 80 | break; 81 | case bool _ when DateTime.Now.AddHours(5).Hour % 6 == 0: 82 | ReadRssLists(9); 83 | break; 84 | case bool _ when DateTime.Now.Hour % 6 == 0: 85 | ReadRssLists(10); 86 | break; 87 | } 88 | //Slow - every 12 hours time wheel (2 times per day) 89 | switch (true) 90 | { 91 | case bool _ when DateTime.Now.AddHours(4).Hour % 12 == 0: 92 | ReadRssLists(11); 93 | StarBot(2, 1, -17, 7); 94 | break; 95 | case bool _ when DateTime.Now.AddHours(8).Hour % 12 == 0: 96 | ReadRssLists(12); 97 | StarBot(3, 1, -161, 7); 98 | break; 99 | case bool _ when DateTime.Now.Hour % 12 == 0: 100 | ReadRssLists(13); 101 | StarBot(2, 1, -17, 7); 102 | DeleteOldTags(); 103 | GenerateTags(); 104 | break; 105 | } 106 | //Daily - every 24 hours (1 time per day) 107 | switch (DateTime.Now.Hour) 108 | { 109 | case 1: 110 | DeleteOlderThan2Weeks(); 111 | break; 112 | case 5: 113 | ReadRssLists(14); 114 | StarBot(4, 1, -329, 7); 115 | break; 116 | } 117 | } 118 | catch (Exception e) 119 | { 120 | TryToDumpErrorToDB(e, "None", "None", "Main Method"); 121 | } 122 | } 123 | #endregion 124 | 125 | #region "RSS Reading Subs" 126 | public void ReadRssLists(int readtier) 127 | { 128 | try 129 | { 130 | using SqlConnection conn = new(_connectionString); 131 | conn.Open(); 132 | using SqlCommand comm = new(); 133 | comm.Connection = conn; 134 | comm.CommandType = CommandType.StoredProcedure; 135 | comm.CommandText = "RSSFunctionApp"; 136 | comm.Parameters.AddWithValue("@Switch", 2); 137 | comm.Parameters.AddWithValue("@ReadTier", readtier); 138 | using SqlDataReader rdr = comm.ExecuteReader(); 139 | while (rdr.Read()) 140 | { 141 | try 142 | { 143 | string thefeedname = rdr.GetString(0); 144 | string thefeedurl = rdr.GetString(1); 145 | bool compareall = rdr.GetBoolean(2); 146 | bool removepaywall = rdr.GetBoolean(3); 147 | bool ingestionlogging = rdr.GetBoolean(4); 148 | ReadTheFeed(thefeedname, thefeedurl, compareall, removepaywall, ingestionlogging); 149 | } 150 | catch (Exception e) 151 | { 152 | TryToDumpErrorToDB(e, "None", "None", "ReadRSSLists"); 153 | } 154 | } 155 | conn.Close(); 156 | } 157 | catch (Exception e) 158 | { 159 | TryToDumpErrorToDB(e, "None", "None", "ReadRSSLists"); 160 | } 161 | } 162 | 163 | public void ReadTheFeed(string feedname, string feedurl, bool compareall, bool removepaywall, bool ingestionlogging) 164 | { 165 | HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(feedurl); 166 | myReq.UserAgent = _userAgent; 167 | myReq.Timeout = _connectionTimeout; 168 | try 169 | { 170 | using HttpWebResponse response = (HttpWebResponse)myReq.GetResponse(); 171 | using XmlReader reader = XmlReader.Create(response.GetResponseStream()); 172 | SyndicationFeed feed = SyndicationFeed.Load(reader); 173 | if (feed == null) return; 174 | try 175 | { 176 | List linkList = InitLinkList(feedname, compareall); 177 | int uploadedArticle = 0; 178 | int discardedArticle = 0; 179 | foreach (SyndicationItem i in feed.Items) 180 | { 181 | try 182 | { 183 | string linkuri = ""; 184 | string imageuri = ""; 185 | DateTime pubDateTime; 186 | try 187 | { 188 | pubDateTime = i.PublishDate.ToUniversalTime().DateTime; 189 | 190 | } 191 | catch (Exception) 192 | { 193 | discardedArticle++; 194 | continue; 195 | } 196 | switch (i.Links.Count) 197 | { 198 | case 1: 199 | linkuri = i.Links[0].Uri.ToString().ToLower(); 200 | break; 201 | case > 1: 202 | { 203 | foreach (SyndicationLink x in i.Links) 204 | { 205 | string thislink = x.Uri.ToString(); 206 | switch (x.RelationshipType) 207 | { 208 | case null: 209 | continue; 210 | case "enclosure" when x.MediaType != null: 211 | { 212 | if (x.MediaType.Contains("image")) 213 | { 214 | List imgExtensions = new() { ".png", ".jpg", ".jpeg", ".gif" }; 215 | foreach (string q in imgExtensions.Where(q => thislink.ToLower().Contains(q)).Where(_ => imageuri == "")) 216 | { 217 | imageuri = thislink; 218 | } 219 | } 220 | break; 221 | } 222 | default: 223 | { 224 | if (linkuri == "") 225 | { 226 | linkuri = thislink.ToLower(); 227 | } 228 | break; 229 | } 230 | } 231 | } 232 | break; 233 | } 234 | } 235 | if (linkuri == "") 236 | { 237 | discardedArticle++; 238 | continue; 239 | } 240 | if (removepaywall) linkuri = _paywallRemover + linkuri; 241 | if (linkList.Contains(linkuri)) continue; 242 | if (!IsValidDateKeepRange(pubDateTime) || linkuri.Length >= 451) continue; 243 | string titletext = ""; 244 | string summarytext = ""; 245 | if (i.Title.Text != null) 246 | { 247 | titletext = i.Title.Text; 248 | if (titletext.Length > 250) titletext = titletext[..250]; 249 | } 250 | if (i.Summary != null) summarytext = SummaryClipping(i.Summary.Text); 251 | int articlescore = 0; 252 | if (_scoreArticles) articlescore = ArticleScoring(titletext.ToLower()); 253 | TryToDumpStoryToDB(feedname, titletext, summarytext, linkuri, pubDateTime, articlescore, imageuri); 254 | linkList.Add(linkuri); 255 | uploadedArticle++; 256 | } 257 | catch (Exception e) 258 | { 259 | TryToDumpErrorToDB(e, feedname, "None", "ReadTheFeed3"); 260 | } 261 | } 262 | if (ingestionlogging && !(uploadedArticle == 0 && discardedArticle == 0)) LogIngestedPercentage(feedname, uploadedArticle, discardedArticle); 263 | } 264 | catch (Exception e) 265 | { 266 | TryToDumpErrorToDB(e, feedname, "None", "ReadTheFeed2"); 267 | } 268 | } 269 | catch (Exception e) 270 | { 271 | TryToDumpErrorToDB(e, feedname, "None", "ReadTheFeed1"); 272 | } 273 | } 274 | 275 | public bool IsValidDateKeepRange(DateTime? dateTime) 276 | { 277 | if (dateTime == null) return false; 278 | 279 | DateTime minValue = DateTime.Now.ToUniversalTime().AddDays(_ArticleKeepAge); 280 | DateTime maxValue = DateTime.Now.ToUniversalTime(); 281 | 282 | return minValue <= dateTime.Value && maxValue >= dateTime.Value; 283 | } 284 | 285 | public static string SummaryClipping(string summary) 286 | { 287 | summary = Regex.Replace(summary, "<[^>]*>", string.Empty); 288 | summary = Regex.Replace(summary, @"\s+", " "); 289 | if (summary.Length > 2000) summary = summary[..2000]; 290 | return summary; 291 | } 292 | 293 | public int ArticleScoring(string articleTitle) 294 | { 295 | int articlescore = 0; 296 | int maxtierfour = 4; 297 | int maxtiersix = 8; 298 | articleTitle = Regex.Replace(articleTitle, "[^a-z ]", string.Empty); 299 | string[] splitwordarray = articleTitle.Split(" "); 300 | foreach (string word in splitwordarray) 301 | { 302 | if (_ArticleScoreKeywords.ContainsKey(word)) 303 | { 304 | switch (_ArticleScoreKeywords[word]) 305 | { 306 | case 1: 307 | articlescore += 13; 308 | break; 309 | case 2: 310 | articlescore += 11; 311 | break; 312 | case 3: 313 | articlescore += 9; 314 | break; 315 | case 4: 316 | if (maxtierfour > 0) 317 | { 318 | maxtierfour -= 1; 319 | articlescore += 5; 320 | } 321 | break; 322 | case 5: 323 | articlescore += 3; 324 | break; 325 | case 6: 326 | if (maxtiersix > 0) 327 | { 328 | maxtiersix -= 1; 329 | articlescore += 1; 330 | } 331 | break; 332 | case 7: 333 | articlescore -= 1; 334 | break; 335 | case 8: 336 | articlescore -= 3; 337 | break; 338 | case 9: 339 | articlescore -= 5; 340 | break; 341 | case 10: 342 | articlescore -= 9; 343 | break; 344 | case 11: 345 | articlescore -= 11; 346 | break; 347 | case 12: 348 | articlescore -= 13; 349 | break; 350 | } 351 | } 352 | } 353 | return articlescore; 354 | } 355 | 356 | public List InitLinkList(string feedname, bool compareall) 357 | { 358 | List linkList = new(); 359 | string urlselectquery = "SELECT URL FROM NewsFeed_NewsArticles WHERE Reporter = @Reporter ORDER BY DateTime DESC"; 360 | if (compareall) { urlselectquery = "SELECT URL FROM NewsFeed_NewsArticles ORDER BY DateTime DESC"; } 361 | 362 | using SqlConnection conn = new(_connectionString); 363 | conn.Open(); 364 | using SqlCommand comm = new(urlselectquery, conn); 365 | if (compareall == false) { comm.Parameters.AddWithValue("@Reporter", feedname); } 366 | using SqlDataReader rdr = comm.ExecuteReader(); 367 | while (rdr.Read()) 368 | { 369 | linkList.Add(rdr.GetString(0).ToLower()); 370 | } 371 | conn.Close(); 372 | return linkList; 373 | } 374 | 375 | public Dictionary InitKeywordTable() 376 | { 377 | Dictionary keywordTable = new(); 378 | using SqlConnection conn = new(_connectionString); 379 | conn.Open(); 380 | using SqlCommand comm = new(); 381 | comm.Connection = conn; 382 | comm.CommandType = CommandType.StoredProcedure; 383 | comm.CommandText = "RSSFunctionApp"; 384 | comm.Parameters.AddWithValue("@Switch", 1); 385 | using SqlDataReader rdr = comm.ExecuteReader(); 386 | while (rdr.Read()) 387 | { 388 | keywordTable.Add(rdr.GetString(0).ToLower(), rdr.GetInt32(1)); 389 | } 390 | conn.Close(); 391 | return keywordTable; 392 | } 393 | #endregion 394 | 395 | #region "Database Inserts" 396 | public void TryToDumpStoryToDB(string feedName, string title, string summary, string link, DateTime pubDate, int articleScore, string image) 397 | { 398 | using SqlConnection conn = new(_connectionString); 399 | using SqlCommand comm = new("INSERT INTO NewsFeed_NewsArticles (Reporter, Article, URL, DateTime, Summary, StarBotScore, Image) VALUES (@Reporter, @Article, @URL, @DateTime, @Summary, @StarBotScore, @Image)", conn); 400 | comm.CommandType = CommandType.Text; 401 | comm.Parameters.AddWithValue("@Reporter", feedName); 402 | comm.Parameters.AddWithValue("@Article", title); 403 | comm.Parameters.AddWithValue("@Summary", summary); 404 | comm.Parameters.AddWithValue("@URL", link); 405 | comm.Parameters.AddWithValue("@DateTime", pubDate); 406 | comm.Parameters.AddWithValue("@StarBotScore", articleScore); 407 | comm.Parameters.AddWithValue("@Image", image); 408 | try 409 | { 410 | conn.Open(); 411 | comm.ExecuteNonQuery(); 412 | } 413 | catch (Exception e) 414 | { 415 | TryToDumpErrorToDB(e, feedName, title, "TryToDumpStoryToDB"); 416 | } 417 | } 418 | 419 | public void TryToDumpErrorToDB(Exception eMessage, string feedName, string articleTitle, string subroutine) 420 | { 421 | try 422 | { 423 | string errormessagetext = eMessage.Message; 424 | string articletext = articleTitle; 425 | string sourcetext = eMessage.StackTrace; 426 | string typetext = eMessage.GetType().Name; 427 | if (errormessagetext.Length > 2000) errormessagetext = errormessagetext[..2000]; 428 | if (articletext.Length > 250) articletext = articletext[..250]; 429 | if (sourcetext is { Length: > 5000 }) sourcetext = sourcetext[..5000]; 430 | if (typetext.Length > 2000) typetext = typetext[..2000]; 431 | using SqlConnection conn = new(_connectionString); 432 | using SqlCommand comm = new("INSERT INTO RSSFunctionApp_ErrorLogs (Type, Message, Source, FeedName, ArticleTitle, Subroutine, DateTime, Handled) VALUES (@Type, @Message, @Source, @FeedName, @ArticleTitle, @Subroutine, GETDATE(), 0)", conn); 433 | comm.CommandType = CommandType.Text; 434 | comm.Parameters.AddWithValue("@Type", typetext); 435 | comm.Parameters.AddWithValue("@Message", errormessagetext); 436 | comm.Parameters.AddWithValue("@Source", sourcetext); 437 | comm.Parameters.AddWithValue("@FeedName", feedName); 438 | comm.Parameters.AddWithValue("@ArticleTitle", articletext); 439 | comm.Parameters.AddWithValue("@Subroutine", subroutine); 440 | conn.Open(); 441 | comm.ExecuteNonQuery(); 442 | } 443 | catch (Exception) 444 | { 445 | //Error Writing To Database - Write Error Locally 446 | } 447 | } 448 | 449 | public void LogIngestedPercentage(string feedName, int uploadedCount, int discardedCount) 450 | { 451 | int totalCount = uploadedCount + discardedCount; 452 | float uploadedPercentage = (float)uploadedCount / (float)totalCount * 100; 453 | using SqlConnection conn = new(_connectionString); 454 | using SqlCommand comm = new("INSERT INTO RSSFunctionApp_IngestionLogs (FeedName, UploadedCount, DiscardedCount, TotalCount, UploadedPercentage, DateTime) VALUES (@FeedName, @UploadedCount, @DiscardedCount, @TotalCount, @UploadedPercentage, GETDATE())", conn); 455 | comm.CommandType = CommandType.Text; 456 | comm.Parameters.AddWithValue("@FeedName", feedName); 457 | comm.Parameters.AddWithValue("@UploadedCount", uploadedCount); 458 | comm.Parameters.AddWithValue("@DiscardedCount", discardedCount); 459 | comm.Parameters.AddWithValue("@TotalCount", totalCount); 460 | comm.Parameters.AddWithValue("@UploadedPercentage", uploadedPercentage); 461 | try 462 | { 463 | conn.Open(); 464 | comm.ExecuteNonQuery(); 465 | } 466 | catch (Exception e) 467 | { 468 | TryToDumpErrorToDB(e, feedName, "", "LogIngestedPercentage"); 469 | } 470 | } 471 | 472 | public void GenerateTags() 473 | { 474 | try 475 | { 476 | Dictionary tagTable = new(); 477 | List bannedTrendsList = new(); 478 | 479 | // Read Banned Trends 480 | using SqlConnection conn1 = new(_connectionString); 481 | conn1.Open(); 482 | using SqlCommand comm1 = new(); 483 | comm1.Connection = conn1; 484 | comm1.CommandType = CommandType.StoredProcedure; 485 | comm1.CommandText = "RSSFunctionApp"; 486 | comm1.Parameters.AddWithValue("@Switch", 3); 487 | using SqlDataReader rdr1 = comm1.ExecuteReader(); 488 | while (rdr1.Read()) 489 | { 490 | bannedTrendsList.Add(rdr1.GetString(0).ToLower()); 491 | } 492 | conn1.Close(); 493 | 494 | // Read Articles 495 | using SqlConnection conn2 = new(_connectionString); 496 | conn2.Open(); 497 | using SqlCommand comm2 = new(); 498 | comm2.Connection = conn2; 499 | comm2.CommandType = CommandType.StoredProcedure; 500 | comm2.CommandText = "RSSFunctionApp"; 501 | comm2.Parameters.AddWithValue("@Switch", 4); 502 | using SqlDataReader rdr2 = comm2.ExecuteReader(); 503 | while (rdr2.Read()) 504 | { 505 | string rawstring = rdr2.GetString(0).ToLower(); 506 | rawstring = Regex.Replace(rawstring, "[^a-z. ]", string.Empty); 507 | string[] splitarray = rawstring.Split(" "); 508 | foreach (string splitstring in splitarray) 509 | { 510 | if (bannedTrendsList.Contains(splitstring)) continue; 511 | if (tagTable.ContainsKey(splitstring)) 512 | { 513 | tagTable[splitstring]++; 514 | } 515 | else 516 | { 517 | tagTable.Add(splitstring, 1); 518 | } 519 | break; 520 | } 521 | } 522 | conn2.Close(); 523 | 524 | var sortedtagTable = from entry in tagTable orderby entry.Value descending select entry; 525 | 526 | using SqlConnection conn3 = new(_connectionString); 527 | using SqlCommand comm3 = new("INSERT INTO NewsFeed_Tags (Tag) VALUES (@Tag0),(@Tag1),(@Tag2),(@Tag3),(@Tag4),(@Tag5),(@Tag6),(@Tag7),(@Tag8),(@Tag9),(@Tag10),(@Tag11),(@Tag12),(@Tag13),(@Tag14),(@Tag15),(@Tag16),(@Tag17),(@Tag18),(@Tag19),(@Tag20),(@Tag21),(@Tag22),(@Tag23),(@Tag24),(@Tag25),(@Tag26),(@Tag27),(@Tag28),(@Tag29)", conn3); 528 | comm3.CommandType = CommandType.Text; 529 | for (int i = 0; i < 30; i++) 530 | { 531 | comm3.Parameters.AddWithValue("@Tag" + i, sortedtagTable.ElementAt(i).Key); 532 | } 533 | conn3.Open(); 534 | comm3.ExecuteNonQuery(); 535 | } 536 | catch (Exception e) 537 | { 538 | TryToDumpErrorToDB(e, "None", "None", "GenerateTags"); 539 | } 540 | } 541 | 542 | public void StarBot(int botNumber, int starCount, int fromRange, int toRange) 543 | { 544 | if (_StarBotsActive == false) { return; } 545 | try 546 | { 547 | using SqlConnection conn1 = new(_connectionString); 548 | conn1.Open(); 549 | using SqlCommand comm1 = new(); 550 | comm1.Connection = conn1; 551 | comm1.CommandType = CommandType.StoredProcedure; 552 | comm1.CommandText = "RSSFunctionApp"; 553 | comm1.Parameters.AddWithValue("@Switch", 7); 554 | comm1.Parameters.AddWithValue("@FromRange", fromRange); 555 | comm1.Parameters.AddWithValue("@ToRange", toRange); 556 | comm1.Parameters.AddWithValue("@BotNumber", "StarBot" + botNumber); 557 | comm1.Parameters.AddWithValue("@TopCount", starCount); 558 | using SqlDataReader rdr1 = comm1.ExecuteReader(); 559 | while (rdr1.Read()) 560 | { 561 | using SqlConnection conn2 = new(_connectionString); 562 | using SqlCommand comm2 = new(); 563 | comm2.Connection = conn2; 564 | comm2.CommandType = CommandType.StoredProcedure; 565 | comm2.CommandText = "NewsFeed_AddStarredArticle"; 566 | comm2.Parameters.AddWithValue("@IPAddress", "StarBot" + botNumber); 567 | comm2.Parameters.AddWithValue("@NewsArticleID", rdr1.GetInt32(0)); 568 | conn2.Open(); 569 | comm2.ExecuteNonQuery(); 570 | } 571 | conn1.Close(); 572 | } 573 | catch (Exception e) 574 | { 575 | TryToDumpErrorToDB(e, "None", "None", "StarBot"); 576 | } 577 | } 578 | #endregion 579 | 580 | #region "Database Deletes" 581 | public void DeleteOlderThan2Weeks() 582 | { 583 | try 584 | { 585 | using SqlConnection conn = new(_connectionString); 586 | using SqlCommand comm = new(); 587 | comm.Connection = conn; 588 | comm.CommandType = CommandType.StoredProcedure; 589 | comm.CommandText = "RSSFunctionApp"; 590 | comm.Parameters.AddWithValue("@Switch", 5); 591 | conn.Open(); 592 | comm.ExecuteNonQuery(); 593 | } 594 | catch (Exception e) 595 | { 596 | TryToDumpErrorToDB(e, "", "", "DeleteOlderThan2Weeks"); 597 | } 598 | } 599 | 600 | public void DeleteOldTags() 601 | { 602 | try 603 | { 604 | using SqlConnection conn = new(_connectionString); 605 | using SqlCommand comm = new(); 606 | comm.Connection = conn; 607 | comm.CommandType = CommandType.StoredProcedure; 608 | comm.CommandText = "RSSFunctionApp"; 609 | comm.Parameters.AddWithValue("@Switch", 6); 610 | conn.Open(); 611 | comm.ExecuteNonQuery(); 612 | } 613 | catch (Exception e) 614 | { 615 | TryToDumpErrorToDB(e, "", "", "DeleteOldTags"); 616 | } 617 | } 618 | #endregion 619 | } 620 | } -------------------------------------------------------------------------------- /RSSFeedAggregator/RSSFeedAggregator.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net6.0 4 | v4 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | PreserveNewest 14 | 15 | 16 | PreserveNewest 17 | Never 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /RSSFeedAggregator/StoredProcedures/NewsFeed.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | SET QUOTED_IDENTIFIER ON 7 | GO 8 | CREATE PROCEDURE [NewsFeed] 9 | @Switch INT = NULL, 10 | @UserIP NVARCHAR(25) = NULL 11 | AS 12 | BEGIN 13 | SET NOCOUNT ON 14 | /* Top Articles - 12H */ 15 | IF @Switch = 1 16 | BEGIN 17 | SELECT TOP 5 * FROM (SELECT a.NewsArticleID AS [ID], COUNT(*) AS [StarCount], b.Reporter, b.Article, b.Summary, b.URL, b.DateTime FROM NewsFeed_StarredArticles a LEFT JOIN NewsFeed_NewsArticles b ON b.ID = a.NewsArticleID GROUP BY a.NewsArticleID, b.Reporter, b.Article, b.Summary, b.URL, b.DateTime) t WHERE DateTime BETWEEN dateadd(hour,-5,GETDATE()) AND dateadd(hour,7,GETDATE()) ORDER BY StarCount DESC, DateTime DESC 18 | END 19 | /* Top Articles - 24H */ 20 | ELSE IF @Switch = 2 21 | BEGIN 22 | SELECT TOP 5 * FROM (SELECT a.NewsArticleID AS [ID], COUNT(*) AS [StarCount], b.Reporter, b.Article, b.Summary, b.URL, b.DateTime FROM NewsFeed_StarredArticles a LEFT JOIN NewsFeed_NewsArticles b ON b.ID = a.NewsArticleID GROUP BY a.NewsArticleID, b.Reporter, b.Article, b.Summary, b.URL, b.DateTime) t WHERE DateTime BETWEEN dateadd(hour,-17,GETDATE()) AND dateadd(hour,-5,GETDATE()) ORDER BY StarCount DESC, DateTime DESC 23 | END 24 | /* Top Articles - 3D */ 25 | ELSE IF @Switch = 3 26 | BEGIN 27 | SELECT TOP 10 * FROM (SELECT a.NewsArticleID AS [ID], COUNT(*) AS [StarCount], b.Reporter, b.Article, b.Summary, b.URL, b.DateTime FROM NewsFeed_StarredArticles a LEFT JOIN NewsFeed_NewsArticles b ON b.ID = a.NewsArticleID GROUP BY a.NewsArticleID, b.Reporter, b.Article, b.Summary, b.URL, b.DateTime) t WHERE DateTime BETWEEN dateadd(hour,-65,GETDATE()) AND dateadd(hour,-17,GETDATE()) ORDER BY StarCount DESC, DateTime DESC 28 | END 29 | /* Top Articles - 7D */ 30 | ELSE IF @Switch = 4 31 | BEGIN 32 | SELECT TOP 10 * FROM (SELECT a.NewsArticleID AS [ID], COUNT(*) AS [StarCount], b.Reporter, b.Article, b.Summary, b.URL, b.DateTime FROM NewsFeed_StarredArticles a LEFT JOIN NewsFeed_NewsArticles b ON b.ID = a.NewsArticleID GROUP BY a.NewsArticleID, b.Reporter, b.Article, b.Summary, b.URL, b.DateTime) t WHERE DateTime BETWEEN dateadd(hour,-161,GETDATE()) AND dateadd(hour,-65,GETDATE()) ORDER BY StarCount DESC, DateTime DESC 33 | END 34 | /* Top Articles - 14D */ 35 | ELSE IF @Switch = 5 36 | BEGIN 37 | SELECT TOP 20 * FROM (SELECT a.NewsArticleID AS [ID], COUNT(*) AS [StarCount], b.Reporter, b.Article, b.Summary, b.URL, b.DateTime FROM NewsFeed_StarredArticles a LEFT JOIN NewsFeed_NewsArticles b ON b.ID = a.NewsArticleID GROUP BY a.NewsArticleID, b.Reporter, b.Article, b.Summary, b.URL, b.DateTime) t WHERE DateTime BETWEEN dateadd(hour,-329,GETDATE()) AND dateadd(hour,-161,GETDATE()) ORDER BY StarCount DESC, DateTime DESC 38 | END 39 | /* Display Trends */ 40 | ELSE IF @Switch = 6 41 | BEGIN 42 | SELECT Tag FROM NewsFeed_Tags ORDER BY Tag ASC 43 | END 44 | /* Check Search Limit */ 45 | ELSE IF @Switch = 7 46 | BEGIN 47 | SELECT COUNT(*) AS [Count] FROM (SELECT [IPAddress], [DateTime] FROM [NewsFeed_SearchHistory] WHERE [IPAddress] = @UserIP AND (DATEDIFF(mi,DateTime,GETDATE()) <= 60)) t 48 | END 49 | /* Check Star Limit */ 50 | ELSE IF @Switch = 8 51 | BEGIN 52 | SELECT COUNT(*) AS [Count] FROM NewsFeed_StarredArticles WHERE UserIPAddress = @UserIP AND (DATEDIFF(hh,DateTime,GETDATE()) <= 24) 53 | END 54 | /* Top Articles - Favorite Articles */ 55 | ELSE IF @Switch = 9 56 | BEGIN 57 | SELECT NewsFeed_NewsArticles.ID, NewsFeed_NewsArticles.Reporter, NewsFeed_NewsArticles.Article, NewsFeed_NewsArticles.URL, NewsFeed_NewsArticles.DateTime, NewsFeed_NewsArticles.Summary, NewsFeed_StarredArticles.UserIPAddress FROM NewsFeed_NewsArticles LEFT JOIN NewsFeed_StarredArticles ON NewsFeed_NewsArticles.ID = NewsFeed_StarredArticles.NewsArticleID WHERE NewsFeed_StarredArticles.UserIPAddress = @UserIP ORDER BY NewsFeed_NewsArticles.DateTime DESC 58 | END 59 | /* Display Available Star Count */ 60 | ELSE IF @Switch = 10 61 | BEGIN 62 | SELECT ID FROM NewsFeed_StarredArticles WHERE UserIPAddress = @UserIP AND (datediff(hh,DateTime,GETDATE()) <= 24) 63 | END 64 | END 65 | -------------------------------------------------------------------------------- /RSSFeedAggregator/StoredProcedures/NewsFeed_AddComment.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_AddComment.sql -------------------------------------------------------------------------------- /RSSFeedAggregator/StoredProcedures/NewsFeed_AddStarredArticle.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_AddStarredArticle.sql -------------------------------------------------------------------------------- /RSSFeedAggregator/StoredProcedures/NewsFeed_DeleteBotStarred.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_DeleteBotStarred.sql -------------------------------------------------------------------------------- /RSSFeedAggregator/StoredProcedures/NewsFeed_DisplayComments.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_DisplayComments.sql -------------------------------------------------------------------------------- /RSSFeedAggregator/StoredProcedures/NewsFeed_LogSearch.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_LogSearch.sql -------------------------------------------------------------------------------- /RSSFeedAggregator/StoredProcedures/NewsFeed_MainFeed.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_MainFeed.sql -------------------------------------------------------------------------------- /RSSFeedAggregator/StoredProcedures/RSSFunctionApp.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/RSSFunctionApp.sql -------------------------------------------------------------------------------- /RSSFeedAggregator/TableSchemas/NewsFeed_ArticleComments.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | 7 | SET QUOTED_IDENTIFIER ON 8 | GO 9 | 10 | CREATE TABLE [NewsFeed_ArticleComments]( 11 | [ID] [int] IDENTITY(1,1) NOT NULL, 12 | [NewsArticleID] [int] NOT NULL, 13 | [UserIPAddress] [nvarchar](25) NOT NULL, 14 | [Comment] [nvarchar](200) NULL, 15 | [DateTime] [datetime] NOT NULL, 16 | [UserName] [nvarchar](15) NULL, 17 | CONSTRAINT [PK_NewsFeed_ArticleComments] PRIMARY KEY CLUSTERED 18 | ( 19 | [ID] ASC 20 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 21 | ) ON [PRIMARY] 22 | GO 23 | 24 | 25 | -------------------------------------------------------------------------------- /RSSFeedAggregator/TableSchemas/NewsFeed_BannedTrends.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | 7 | SET QUOTED_IDENTIFIER ON 8 | GO 9 | 10 | CREATE TABLE [NewsFeed_BannedTrends]( 11 | [Trend] [nvarchar](25) NOT NULL, 12 | CONSTRAINT [PK_NewsFeed_BannedTrends] PRIMARY KEY CLUSTERED 13 | ( 14 | [Trend] ASC 15 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 16 | ) ON [PRIMARY] 17 | GO 18 | 19 | 20 | -------------------------------------------------------------------------------- /RSSFeedAggregator/TableSchemas/NewsFeed_NewsArticles.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | 7 | SET QUOTED_IDENTIFIER ON 8 | GO 9 | 10 | CREATE TABLE [NewsFeed_NewsArticles]( 11 | [ID] [int] IDENTITY(1,1) NOT NULL, 12 | [Reporter] [nvarchar](100) NOT NULL, 13 | [Article] [nvarchar](250) NOT NULL, 14 | [URL] [nvarchar](450) NOT NULL, 15 | [DateTime] [datetime] NOT NULL, 16 | [Summary] [nvarchar](2000) NULL, 17 | [StarBotScore] [int] NULL, 18 | [Image] [nvarchar](450) NULL, 19 | CONSTRAINT [PK_NewsFeed_NewsArticles] PRIMARY KEY CLUSTERED 20 | ( 21 | [URL] ASC 22 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 23 | ) ON [PRIMARY] 24 | GO 25 | 26 | 27 | -------------------------------------------------------------------------------- /RSSFeedAggregator/TableSchemas/NewsFeed_SearchHistory.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | 7 | SET QUOTED_IDENTIFIER ON 8 | GO 9 | 10 | CREATE TABLE [NewsFeed_SearchHistory]( 11 | [ID] [int] IDENTITY(1,1) NOT NULL, 12 | [IPAddress] [nvarchar](100) NOT NULL, 13 | [SearchTerm] [nvarchar](100) NOT NULL, 14 | [DateTime] [datetime] NOT NULL, 15 | CONSTRAINT [PK_NewsFeed_SearchHistory] PRIMARY KEY CLUSTERED 16 | ( 17 | [ID] ASC 18 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 19 | ) ON [PRIMARY] 20 | GO 21 | 22 | 23 | -------------------------------------------------------------------------------- /RSSFeedAggregator/TableSchemas/NewsFeed_StarredArticles.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | 7 | SET QUOTED_IDENTIFIER ON 8 | GO 9 | 10 | CREATE TABLE [NewsFeed_StarredArticles]( 11 | [ID] [int] IDENTITY(1,1) NOT NULL, 12 | [NewsArticleID] [int] NOT NULL, 13 | [UserIPAddress] [nvarchar](25) NOT NULL, 14 | [DateTime] [datetime] NOT NULL, 15 | CONSTRAINT [PK_NewsFeed_StarredArticles] PRIMARY KEY CLUSTERED 16 | ( 17 | [ID] ASC 18 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 19 | ) ON [PRIMARY] 20 | GO 21 | 22 | 23 | -------------------------------------------------------------------------------- /RSSFeedAggregator/TableSchemas/NewsFeed_Tags.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | 7 | SET QUOTED_IDENTIFIER ON 8 | GO 9 | 10 | CREATE TABLE [NewsFeed_Tags]( 11 | [Tag] [nvarchar](25) NOT NULL, 12 | CONSTRAINT [PK_NewsFeed_Tags] PRIMARY KEY CLUSTERED 13 | ( 14 | [Tag] ASC 15 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 16 | ) ON [PRIMARY] 17 | GO 18 | 19 | 20 | -------------------------------------------------------------------------------- /RSSFeedAggregator/TableSchemas/RSSFunctionApp_ErrorLogs.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | 7 | SET QUOTED_IDENTIFIER ON 8 | GO 9 | 10 | CREATE TABLE [RSSFunctionApp_ErrorLogs]( 11 | [ID] [int] IDENTITY(1,1) NOT NULL, 12 | [Type] [nvarchar](2000) NOT NULL, 13 | [Message] [nvarchar](2000) NOT NULL, 14 | [Source] [nvarchar](max) NOT NULL, 15 | [FeedName] [nvarchar](100) NOT NULL, 16 | [ArticleTitle] [nvarchar](250) NULL, 17 | [Subroutine] [nvarchar](100) NOT NULL, 18 | [DateTime] [datetime] NOT NULL, 19 | [Handled] [int] NOT NULL, 20 | CONSTRAINT [PK_RSSFunctionApp_ErrorLogs] PRIMARY KEY CLUSTERED 21 | ( 22 | [ID] ASC 23 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 24 | ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY] 25 | GO 26 | 27 | 28 | -------------------------------------------------------------------------------- /RSSFeedAggregator/TableSchemas/RSSFunctionApp_IngestionLogs.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | 7 | SET QUOTED_IDENTIFIER ON 8 | GO 9 | 10 | CREATE TABLE [RSSFunctionApp_IngestionLogs]( 11 | [ID] [int] IDENTITY(1,1) NOT NULL, 12 | [FeedName] [nvarchar](100) NOT NULL, 13 | [UploadedCount] [int] NOT NULL, 14 | [DiscardedCount] [int] NOT NULL, 15 | [TotalCount] [int] NOT NULL, 16 | [UploadedPercentage] [float] NOT NULL, 17 | [DateTime] [datetime] NOT NULL, 18 | CONSTRAINT [PK_RSSFunctionApp_IngestionLogs] PRIMARY KEY CLUSTERED 19 | ( 20 | [ID] ASC 21 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 22 | ) ON [PRIMARY] 23 | GO 24 | 25 | 26 | -------------------------------------------------------------------------------- /RSSFeedAggregator/TableSchemas/RSSFunctionApp_RSSFeeds.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | 7 | SET QUOTED_IDENTIFIER ON 8 | GO 9 | 10 | CREATE TABLE [RSSFunctionApp_RSSFeeds]( 11 | [ID] [int] IDENTITY(1,1) NOT NULL, 12 | [FeedName] [nvarchar](50) NOT NULL, 13 | [URL] [nvarchar](100) NOT NULL, 14 | [ReadTier] [int] NOT NULL, 15 | [Enabled] [int] NOT NULL, 16 | [Category] [nvarchar](5) NULL, 17 | [CompareAll] [bit] NULL, 18 | [RemovePaywall] [bit] NULL, 19 | [IngestionLogging] [bit] NULL, 20 | CONSTRAINT [PK_RSSFunctionApp_RSSFeeds] PRIMARY KEY CLUSTERED 21 | ( 22 | [URL] ASC 23 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 24 | ) ON [PRIMARY] 25 | GO 26 | 27 | 28 | -------------------------------------------------------------------------------- /RSSFeedAggregator/TableSchemas/TopArticles_StarbotKeywords.sql: -------------------------------------------------------------------------------- 1 | USE [Database] 2 | GO 3 | 4 | SET ANSI_NULLS ON 5 | GO 6 | 7 | SET QUOTED_IDENTIFIER ON 8 | GO 9 | 10 | CREATE TABLE [TopArticles_StarbotKeywords]( 11 | [Keyword] [nvarchar](25) NOT NULL, 12 | [Tier] [int] NOT NULL, 13 | CONSTRAINT [PK_TopArticles_StarbotKeywords] PRIMARY KEY CLUSTERED 14 | ( 15 | [Keyword] ASC 16 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 17 | ) ON [PRIMARY] 18 | GO 19 | 20 | 21 | -------------------------------------------------------------------------------- /RSSFeedAggregator/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | }, 9 | "enableLiveMetricsFilters": true 10 | } 11 | } 12 | } --------------------------------------------------------------------------------