├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── RSSFeedAggregator.sln
└── RSSFeedAggregator
├── .gitignore
├── Properties
├── launchSettings.json
├── serviceDependencies.json
└── serviceDependencies.local.json
├── Queries
├── Articles_FindDuplicates.sql
├── Articles_FindUseless.sql
├── Articles_Latest.sql
├── Counter_ArticleDuplicates.sql
├── Counter_Reporter.sql
├── Errors_Unhandled.sql
├── Ingested_Over.sql
├── Ingested_Under.sql
├── RSSFeeds_Insert.sql
├── RSSFeeds_TurnedOff.sql
├── RSSFeeds_TurnedOn.sql
├── ReadTier_Distribution.sql
├── Reporter_FindMissing.sql
├── Reporter_LastInsert.sql
├── Reporter_UselessArticleCount.sql
├── TopArticles_HighestScore.sql
└── TopArticles_MainPageScoring.sql
├── RSSFeedAggregator.cs
├── RSSFeedAggregator.csproj
├── StoredProcedures
├── NewsFeed.sql
├── NewsFeed_AddComment.sql
├── NewsFeed_AddStarredArticle.sql
├── NewsFeed_DeleteBotStarred.sql
├── NewsFeed_DisplayComments.sql
├── NewsFeed_LogSearch.sql
├── NewsFeed_MainFeed.sql
└── RSSFunctionApp.sql
├── TableSchemas
├── NewsFeed_ArticleComments.sql
├── NewsFeed_BannedTrends.sql
├── NewsFeed_NewsArticles.sql
├── NewsFeed_SearchHistory.sql
├── NewsFeed_StarredArticles.sql
├── NewsFeed_Tags.sql
├── RSSFunctionApp_ErrorLogs.sql
├── RSSFunctionApp_IngestionLogs.sql
├── RSSFunctionApp_RSSFeeds.sql
└── TopArticles_StarbotKeywords.sql
└── host.json
/.gitattributes:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Set default behavior to automatically normalize line endings.
3 | ###############################################################################
4 | * text=auto
5 |
6 | ###############################################################################
7 | # Set default behavior for command prompt diff.
8 | #
9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs diff=csharp
14 |
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln merge=binary
26 | #*.csproj merge=binary
27 | #*.vbproj merge=binary
28 | #*.vcxproj merge=binary
29 | #*.vcproj merge=binary
30 | #*.dbproj merge=binary
31 | #*.fsproj merge=binary
32 | #*.lsproj merge=binary
33 | #*.wixproj merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj merge=binary
36 | #*.wwaproj merge=binary
37 |
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg binary
44 | #*.png binary
45 | #*.gif binary
46 |
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | #
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the
52 | # entries below.
53 | ###############################################################################
54 | #*.doc diff=astextplain
55 | #*.DOC diff=astextplain
56 | #*.docx diff=astextplain
57 | #*.DOCX diff=astextplain
58 | #*.dot diff=astextplain
59 | #*.DOT diff=astextplain
60 | #*.pdf diff=astextplain
61 | #*.PDF diff=astextplain
62 | #*.rtf diff=astextplain
63 | #*.RTF diff=astextplain
64 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Ww][Ii][Nn]32/
27 | [Aa][Rr][Mm]/
28 | [Aa][Rr][Mm]64/
29 | bld/
30 | [Bb]in/
31 | [Oo]bj/
32 | [Oo]ut/
33 | [Ll]og/
34 | [Ll]ogs/
35 |
36 | # Visual Studio 2015/2017 cache/options directory
37 | .vs/
38 | # Uncomment if you have tasks that create the project's static files in wwwroot
39 | #wwwroot/
40 |
41 | # Visual Studio 2017 auto generated files
42 | Generated\ Files/
43 |
44 | # MSTest test Results
45 | [Tt]est[Rr]esult*/
46 | [Bb]uild[Ll]og.*
47 |
48 | # NUnit
49 | *.VisualState.xml
50 | TestResult.xml
51 | nunit-*.xml
52 |
53 | # Build Results of an ATL Project
54 | [Dd]ebugPS/
55 | [Rr]eleasePS/
56 | dlldata.c
57 |
58 | # Benchmark Results
59 | BenchmarkDotNet.Artifacts/
60 |
61 | # .NET Core
62 | project.lock.json
63 | project.fragment.lock.json
64 | artifacts/
65 |
66 | # ASP.NET Scaffolding
67 | ScaffoldingReadMe.txt
68 |
69 | # StyleCop
70 | StyleCopReport.xml
71 |
72 | # Files built by Visual Studio
73 | *_i.c
74 | *_p.c
75 | *_h.h
76 | *.ilk
77 | *.meta
78 | *.obj
79 | *.iobj
80 | *.pch
81 | *.pdb
82 | *.ipdb
83 | *.pgc
84 | *.pgd
85 | *.rsp
86 | *.sbr
87 | *.tlb
88 | *.tli
89 | *.tlh
90 | *.tmp
91 | *.tmp_proj
92 | *_wpftmp.csproj
93 | *.log
94 | *.vspscc
95 | *.vssscc
96 | .builds
97 | *.pidb
98 | *.svclog
99 | *.scc
100 |
101 | # Chutzpah Test files
102 | _Chutzpah*
103 |
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 |
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 |
121 | # Visual Studio Trace Files
122 | *.e2e
123 |
124 | # TFS 2012 Local Workspace
125 | $tf/
126 |
127 | # Guidance Automation Toolkit
128 | *.gpState
129 |
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 |
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 |
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 |
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 |
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 |
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 |
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 |
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 |
163 | # Web workbench (sass)
164 | .sass-cache/
165 |
166 | # Installshield output folder
167 | [Ee]xpress/
168 |
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 |
179 | # Click-Once directory
180 | publish/
181 |
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 |
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 |
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 |
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 |
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 |
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 |
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 |
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 |
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 |
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 |
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 |
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 |
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 |
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 |
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 |
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 |
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 |
288 | # Visual Studio 6 build log
289 | *.plg
290 |
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 |
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 |
297 | # Visual Studio LightSwitch build output
298 | **/*.HTMLClient/GeneratedArtifacts
299 | **/*.DesktopClient/GeneratedArtifacts
300 | **/*.DesktopClient/ModelManifest.xml
301 | **/*.Server/GeneratedArtifacts
302 | **/*.Server/ModelManifest.xml
303 | _Pvt_Extensions
304 |
305 | # Paket dependency manager
306 | .paket/paket.exe
307 | paket-files/
308 |
309 | # FAKE - F# Make
310 | .fake/
311 |
312 | # CodeRush personal settings
313 | .cr/personal
314 |
315 | # Python Tools for Visual Studio (PTVS)
316 | __pycache__/
317 | *.pyc
318 |
319 | # Cake - Uncomment if you are using it
320 | # tools/**
321 | # !tools/packages.config
322 |
323 | # Tabs Studio
324 | *.tss
325 |
326 | # Telerik's JustMock configuration file
327 | *.jmconfig
328 |
329 | # BizTalk build output
330 | *.btp.cs
331 | *.btm.cs
332 | *.odx.cs
333 | *.xsd.cs
334 |
335 | # OpenCover UI analysis results
336 | OpenCover/
337 |
338 | # Azure Stream Analytics local run output
339 | ASALocalRun/
340 |
341 | # MSBuild Binary and Structured Log
342 | *.binlog
343 |
344 | # NVidia Nsight GPU debugger configuration file
345 | *.nvuser
346 |
347 | # MFractors (Xamarin productivity tool) working folder
348 | .mfractor/
349 |
350 | # Local History for Visual Studio
351 | .localhistory/
352 |
353 | # BeatPulse healthcheck temp database
354 | healthchecksdb
355 |
356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
357 | MigrationBackup/
358 |
359 | # Ionide (cross platform F# VS Code tools) working folder
360 | .ionide/
361 |
362 | # Fody - auto-generated XML schema
363 | FodyWeavers.xsd
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Kyler Condran
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RSS Feed Aggregator
2 |
3 | RSS Feed Aggregator is a database driven RSS Feed Aggregator application built on Azure Functions. It contains the backend features for the administration of a news aggregation website. If you want to build your own news website this is a great place to start!
4 |
5 |
6 |
7 | ## Download
8 | Code available on GitHub [Download](https://github.com/KylerCondran/RSSFeedAggregator)
9 |
10 | ## Getting Started
11 |
12 | 1. Acquire an [Azure Functions](https://azure.microsoft.com/) instance, it is free for new signups for 30 days. If you want to instead host the application yourself you can convert it to a C# console application.
13 |
14 | 2. Acquire a MS SQL Server instance where you can host a database. Once you have a database you can use the .SQL files in the Table Schemas folder to create the initial tables. Also run and create the Stored Procedures.
15 |
16 | 3. Add your database connection string in the initialization method. Also set the user agent string and paywall remover website if interested.
17 |
18 | 4. The main run method contains a [NCRONTAB Timer Trigger](https://learn.microsoft.com/en-us/azure/azure-functions/functions-bindings-timer) "0 0 * * * *", this controls how often the application is executed.
19 |
20 | 5. The main run method contains time wheels, switch statements that break up the workload of downloading large volumes of RSS feeds when executed during certain hours of the day. Each readtier is a block of RSS Feeds which will be downloaded together in batches. It is a good idea to spread RSS Feeds out uniformly in these tiers for best performance.
21 |
22 | - ReadTiers 1-4: Fast - check every four hours (6 times per day)
23 | - ReadTiers 5-10: Regular - check every 6 hours (4 times per day)
24 | - ReadTiers 11-13: Slow - check every 12 hours (2 times per day)
25 | - ReadTier 14: Daily - every 24 hours (1 time per day)
26 |
27 | 6. Use the RSSFeeds_Insert.sql file in the Queries folder to add RSS feeds to the RSSFunctionApp_RSSFeeds Table. Set enabled to 1 to turn it on, set the compareall flag if the RSS feed serves links from other providers, set the removepaywall flag if the feed contains content that is paywalled, set the ingestionlogging flag when first activating to monitor if you are under consuming or over consuming the feed in the RSSFunctionApp_IngestionLogs Table, you can speed the consumption rate up or slow it down by increasing or decreasing the readtier number depending on how often the RSS feed posts new content. It is best to not over consume the feed so you do not risk getting blocked.
28 |
29 | 7. Trends are generated every 12 hours in the NewsFeed_Tags table, it reads every news article title in the database and picks out the 30 most frequent words. If you do not want a trend to appear in the trend table, add the word to the NewsFeed_BannedTrends table.
30 |
31 | 8. Article title scoring is done using lexicon based sentiment analysis when keywords are added to the TopArticles_StarbotKeywords table, there are 12 tiers a keyword can have. Tiers 1-6 are positive scoring words, tiers 7-12 are negative scoring words. The total score for the article title is calculated and added with the news article record to the database so it can be configured to appear higher or lower in a website feed.
32 |
33 | - Tier 1: 13 points
34 | - Tier 2: 11 points
35 | - Tier 3: 9 points
36 | - Tier 4: 5 points
37 | - Tier 5: 3 points
38 | - Tier 6: 1 point
39 | - Tier 7: -1 point
40 | - Tier 8: -3 points
41 | - Tier 9: -5 points
42 | - Tier 10: -9 points
43 | - Tier 11: -11 points
44 | - Tier 12: -13 points
45 |
46 | This allows you to score news articles based on what you are most interested in and what you are not interested in. This list is completely subjective and to be designed by you based on what news articles are most interesting to you. It is best to add several thousand words to this table to make articles scored efficiently.
47 |
48 | 9. After your scoring system is set up, periodically bots will go through and favorite articles that have the highest scores. There are currently 4 bots. Bot one will favorite an article once an hour. Bot two will favorite an article four times a day. Bot three will favorite an article twice a day. Bot four will favorite an article once per day. The same numbered bot will never favorite the same article twice. This causes a staggering effect that can easily be overpowered by legitimate users and causes interesting articles to be shifted towards the top to potentially catch a users interest.
49 |
50 | 10. Once per day administrative tasks are performed, records older than 2 weeks are deleted from the database, this can be configured as desired in the RSSFunctionAPP stored procedure.
51 |
52 | ## Questions?
53 |
54 | Feel free to contact me with any questions!
55 |
56 | You can reach me on [LinkedIn](https://www.linkedin.com/in/kylercondran/) or check out my [GitHub](https://github.com/KylerCondran/) for my other projects.
57 |
--------------------------------------------------------------------------------
/RSSFeedAggregator.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.6.33829.357
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RSSFeedAggregator", "RSSFeedAggregator\RSSFeedAggregator.csproj", "{3BC70D9A-2D9C-4D57-8FC2-8E12F02FB240}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Any CPU = Debug|Any CPU
11 | Release|Any CPU = Release|Any CPU
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {3BC70D9A-2D9C-4D57-8FC2-8E12F02FB240}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | {3BC70D9A-2D9C-4D57-8FC2-8E12F02FB240}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | {3BC70D9A-2D9C-4D57-8FC2-8E12F02FB240}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | {3BC70D9A-2D9C-4D57-8FC2-8E12F02FB240}.Release|Any CPU.Build.0 = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {EFE3767C-28C0-471A-9910-DCCA2FAF749E}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 |
4 | # Azure Functions localsettings file
5 | local.settings.json
6 |
7 | # User-specific files
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Build results
17 | [Dd]ebug/
18 | [Dd]ebugPublic/
19 | [Rr]elease/
20 | [Rr]eleases/
21 | x64/
22 | x86/
23 | bld/
24 | [Bb]in/
25 | [Oo]bj/
26 | [Ll]og/
27 |
28 | # Visual Studio 2015 cache/options directory
29 | .vs/
30 | # Uncomment if you have tasks that create the project's static files in wwwroot
31 | #wwwroot/
32 |
33 | # MSTest test Results
34 | [Tt]est[Rr]esult*/
35 | [Bb]uild[Ll]og.*
36 |
37 | # NUNIT
38 | *.VisualState.xml
39 | TestResult.xml
40 |
41 | # Build Results of an ATL Project
42 | [Dd]ebugPS/
43 | [Rr]eleasePS/
44 | dlldata.c
45 |
46 | # DNX
47 | project.lock.json
48 | project.fragment.lock.json
49 | artifacts/
50 |
51 | *_i.c
52 | *_p.c
53 | *_i.h
54 | *.ilk
55 | *.meta
56 | *.obj
57 | *.pch
58 | *.pdb
59 | *.pgc
60 | *.pgd
61 | *.rsp
62 | *.sbr
63 | *.tlb
64 | *.tli
65 | *.tlh
66 | *.tmp
67 | *.tmp_proj
68 | *.log
69 | *.vspscc
70 | *.vssscc
71 | .builds
72 | *.pidb
73 | *.svclog
74 | *.scc
75 |
76 | # Chutzpah Test files
77 | _Chutzpah*
78 |
79 | # Visual C++ cache files
80 | ipch/
81 | *.aps
82 | *.ncb
83 | *.opendb
84 | *.opensdf
85 | *.sdf
86 | *.cachefile
87 | *.VC.db
88 | *.VC.VC.opendb
89 |
90 | # Visual Studio profiler
91 | *.psess
92 | *.vsp
93 | *.vspx
94 | *.sap
95 |
96 | # TFS 2012 Local Workspace
97 | $tf/
98 |
99 | # Guidance Automation Toolkit
100 | *.gpState
101 |
102 | # ReSharper is a .NET coding add-in
103 | _ReSharper*/
104 | *.[Rr]e[Ss]harper
105 | *.DotSettings.user
106 |
107 | # JustCode is a .NET coding add-in
108 | .JustCode
109 |
110 | # TeamCity is a build add-in
111 | _TeamCity*
112 |
113 | # DotCover is a Code Coverage Tool
114 | *.dotCover
115 |
116 | # NCrunch
117 | _NCrunch_*
118 | .*crunch*.local.xml
119 | nCrunchTemp_*
120 |
121 | # MightyMoose
122 | *.mm.*
123 | AutoTest.Net/
124 |
125 | # Web workbench (sass)
126 | .sass-cache/
127 |
128 | # Installshield output folder
129 | [Ee]xpress/
130 |
131 | # DocProject is a documentation generator add-in
132 | DocProject/buildhelp/
133 | DocProject/Help/*.HxT
134 | DocProject/Help/*.HxC
135 | DocProject/Help/*.hhc
136 | DocProject/Help/*.hhk
137 | DocProject/Help/*.hhp
138 | DocProject/Help/Html2
139 | DocProject/Help/html
140 |
141 | # Click-Once directory
142 | publish/
143 |
144 | # Publish Web Output
145 | *.[Pp]ublish.xml
146 | *.azurePubxml
147 | # TODO: Comment the next line if you want to checkin your web deploy settings
148 | # but database connection strings (with potential passwords) will be unencrypted
149 | #*.pubxml
150 | *.publishproj
151 |
152 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
153 | # checkin your Azure Web App publish settings, but sensitive information contained
154 | # in these scripts will be unencrypted
155 | PublishScripts/
156 |
157 | # NuGet Packages
158 | *.nupkg
159 | # The packages folder can be ignored because of Package Restore
160 | **/packages/*
161 | # except build/, which is used as an MSBuild target.
162 | !**/packages/build/
163 | # Uncomment if necessary however generally it will be regenerated when needed
164 | #!**/packages/repositories.config
165 | # NuGet v3's project.json files produces more ignoreable files
166 | *.nuget.props
167 | *.nuget.targets
168 |
169 | # Microsoft Azure Build Output
170 | csx/
171 | *.build.csdef
172 |
173 | # Microsoft Azure Emulator
174 | ecf/
175 | rcf/
176 |
177 | # Windows Store app package directories and files
178 | AppPackages/
179 | BundleArtifacts/
180 | Package.StoreAssociation.xml
181 | _pkginfo.txt
182 |
183 | # Visual Studio cache files
184 | # files ending in .cache can be ignored
185 | *.[Cc]ache
186 | # but keep track of directories ending in .cache
187 | !*.[Cc]ache/
188 |
189 | # Others
190 | ClientBin/
191 | ~$*
192 | *~
193 | *.dbmdl
194 | *.dbproj.schemaview
195 | *.jfm
196 | *.pfx
197 | *.publishsettings
198 | node_modules/
199 | orleans.codegen.cs
200 |
201 | # Since there are multiple workflows, uncomment next line to ignore bower_components
202 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
203 | #bower_components/
204 |
205 | # RIA/Silverlight projects
206 | Generated_Code/
207 |
208 | # Backup & report files from converting an old project file
209 | # to a newer Visual Studio version. Backup files are not needed,
210 | # because we have git ;-)
211 | _UpgradeReport_Files/
212 | Backup*/
213 | UpgradeLog*.XML
214 | UpgradeLog*.htm
215 |
216 | # SQL Server files
217 | *.mdf
218 | *.ldf
219 |
220 | # Business Intelligence projects
221 | *.rdl.data
222 | *.bim.layout
223 | *.bim_*.settings
224 |
225 | # Microsoft Fakes
226 | FakesAssemblies/
227 |
228 | # GhostDoc plugin setting file
229 | *.GhostDoc.xml
230 |
231 | # Node.js Tools for Visual Studio
232 | .ntvs_analysis.dat
233 |
234 | # Visual Studio 6 build log
235 | *.plg
236 |
237 | # Visual Studio 6 workspace options file
238 | *.opt
239 |
240 | # Visual Studio LightSwitch build output
241 | **/*.HTMLClient/GeneratedArtifacts
242 | **/*.DesktopClient/GeneratedArtifacts
243 | **/*.DesktopClient/ModelManifest.xml
244 | **/*.Server/GeneratedArtifacts
245 | **/*.Server/ModelManifest.xml
246 | _Pvt_Extensions
247 |
248 | # Paket dependency manager
249 | .paket/paket.exe
250 | paket-files/
251 |
252 | # FAKE - F# Make
253 | .fake/
254 |
255 | # JetBrains Rider
256 | .idea/
257 | *.sln.iml
258 |
259 | # CodeRush
260 | .cr/
261 |
262 | # Python Tools for Visual Studio (PTVS)
263 | __pycache__/
264 | *.pyc
--------------------------------------------------------------------------------
/RSSFeedAggregator/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "profiles": {
3 | "RSSFeedAggregator": {
4 | "commandName": "Project",
5 | "commandLineArgs": "--port 7138",
6 | "launchBrowser": false
7 | }
8 | }
9 | }
--------------------------------------------------------------------------------
/RSSFeedAggregator/Properties/serviceDependencies.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": {
3 | "appInsights1": {
4 | "type": "appInsights"
5 | },
6 | "storage1": {
7 | "type": "storage",
8 | "connectionId": "AzureWebJobsStorage"
9 | }
10 | }
11 | }
--------------------------------------------------------------------------------
/RSSFeedAggregator/Properties/serviceDependencies.local.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": {
3 | "appInsights1": {
4 | "type": "appInsights.sdk"
5 | },
6 | "storage1": {
7 | "type": "storage.emulator",
8 | "connectionId": "AzureWebJobsStorage"
9 | }
10 | }
11 | }
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Articles_FindDuplicates.sql:
--------------------------------------------------------------------------------
1 | /* This query finds which reporters have the most duplicates in the table */
2 | SELECT Reporter, count(*) as total FROM [NewsFeed_NewsArticles] WHERE Article IN (SELECT Article
3 | FROM [NewsFeed_NewsArticles]
4 | GROUP BY Article
5 | HAVING COUNT(Article) > 1) group by reporter order by total desc
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Articles_FindUseless.sql:
--------------------------------------------------------------------------------
1 | /* This query finds which articles do not have any scoring - used to fine tune scoring by adding keywords to the TopArticles_StarbotKeywords table */
2 | SELECT TOP (1000) [ID]
3 | ,[Reporter]
4 | ,[Article]
5 | FROM [NewsFeed_NewsArticles] WHERE StarBotScore = 0 AND datediff(hh,DateTime,GETDATE()) < 12 ORDER BY ID DESC
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Articles_Latest.sql:
--------------------------------------------------------------------------------
1 | /* This query finds the most recently downloaded articles */
2 | SELECT TOP (1000) [ID]
3 | ,[Reporter]
4 | ,[Article]
5 | ,[URL]
6 | ,[Image]
7 | ,[DateTime]
8 | ,[Summary]
9 | ,[StarBotScore]
10 | FROM [NewsFeed_NewsArticles] ORDER BY ID DESC
11 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Counter_ArticleDuplicates.sql:
--------------------------------------------------------------------------------
1 | /* This query finds which articles are most duplicated in the table */
2 | SELECT [Article], Count(*) as [counter]
3 | FROM [NewsFeed_NewsArticles]
4 | group by [Article]
5 | order by [counter] desc
6 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Counter_Reporter.sql:
--------------------------------------------------------------------------------
1 | /* This query finds the total article count in the table and total article count broken down by reporter */
2 | SELECT Count(*) AS [ArticleCount] FROM [NewsFeed_NewsArticles]
3 | SELECT [Reporter], Count(*) as [counter]
4 | FROM [NewsFeed_NewsArticles]
5 | group by [Reporter]
6 | order by [counter] desc
7 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Errors_Unhandled.sql:
--------------------------------------------------------------------------------
1 | /* This query finds the more serious errors - set handled flag to 1 after dealing with the error to ignore */
2 | SELECT TOP (1000) [ID]
3 | ,[Type]
4 | ,[Message]
5 | ,[Source]
6 | ,[FeedName]
7 | ,[ArticleTitle]
8 | ,[Subroutine]
9 | ,[DateTime]
10 | ,[Handled]
11 | FROM [RSSFunctionApp_ErrorLogs]
12 | WHERE Handled = 0 AND Message NOT IN ('The operation has timed out.', 'The remote server returned an error: (503) Service Unavailable.',
13 | 'The remote server returned an error: (504) Gateway Time-out.','The remote server returned an error: (502) Bad Gateway.','The response ended prematurely.') AND Type NOT IN ('XmlException')
14 | ORDER BY DateTime DESC
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Ingested_Over.sql:
--------------------------------------------------------------------------------
1 | /* This query finds RSS Feeds that are being over consumed - increase the readtier number in the RSSFunctionAPP_RSSFeeds table to slow down the consumption rate */
2 | SELECT FeedName, count(*) AS [UploadedPercentage]
3 | FROM [RSSFunctionApp_IngestionLogs]
4 | WHERE UploadedPercentage = '0'
5 | GROUP BY FeedName
6 | HAVING COUNT(*) > 0
7 | ORDER BY UploadedPercentage DESC
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Ingested_Under.sql:
--------------------------------------------------------------------------------
1 | /* This query finds RSS Feeds that are being under consumed - decrease the readtier number in the RSSFunctionAPP_RSSFeeds table to speed up the consumption rate */
2 | SELECT FeedName, count(*) AS [UploadedPercentage]
3 | FROM [RSSFunctionApp_IngestionLogs]
4 | WHERE UploadedPercentage = '100'
5 | GROUP BY FeedName
6 | HAVING COUNT(*) > 0
7 | ORDER BY UploadedPercentage DESC
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/RSSFeeds_Insert.sql:
--------------------------------------------------------------------------------
1 | /* This query adds new RSS Feeds to the table */
2 | INSERT INTO RSSFunctionApp_RSSFeeds (FeedName, URL, ReadTier, Enabled, Category, CompareAll, RemovePaywall, IngestionLogging) VALUES ('', '', 14, 0, '', 0, 0, 1)
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/RSSFeeds_TurnedOff.sql:
--------------------------------------------------------------------------------
1 | /* This query finds which RSS Feeds are currently disabled */
2 | SELECT TOP (1000) [ID]
3 | ,[FeedName]
4 | ,[URL]
5 | ,[ReadTier]
6 | ,[Enabled]
7 | ,[CompareAll]
8 | ,[RemovePaywall]
9 | ,[IngestionLogging]
10 | ,[Category]
11 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 0 ORDER BY FeedName ASC
12 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/RSSFeeds_TurnedOn.sql:
--------------------------------------------------------------------------------
1 | /* This query finds which RSS Feeds are currently enabled and shows what time they are downloaded, may have to be adjusted for timezone */
2 | SELECT TOP (1000) [ID]
3 | ,[FeedName]
4 | ,[URL]
5 | ,[ReadTier]
6 | ,[Enabled]
7 | ,[CompareAll]
8 | ,[RemovePaywall]
9 | ,[IngestionLogging]
10 | ,CASE
11 | WHEN ReadTier = 1 THEN '7PM,11PM,3AM,7AM,11AM,3PM'
12 | WHEN ReadTier = 2 THEN '6PM,10PM,2AM,6AM,10AM,2PM'
13 | WHEN ReadTier = 3 THEN '9PM,1AM,5AM,9AM,1PM,5PM'
14 | WHEN ReadTier = 4 THEN '8PM,12AM,4AM,8AM,12PM,4PM'
15 | WHEN ReadTier = 5 THEN '5PM,11PM,5AM,11AM'
16 | WHEN ReadTier = 6 THEN '10PM,4AM,10AM,4PM'
17 | WHEN ReadTier = 7 THEN '9PM,3AM,9AM,3PM'
18 | WHEN ReadTier = 8 THEN '8PM,2AM,8AM,2PM'
19 | WHEN ReadTier = 9 THEN '7PM,1AM,7AM,1PM'
20 | WHEN ReadTier = 10 THEN '6PM,12AM,6AM,12PM'
21 | WHEN ReadTier = 11 THEN '4AM,4PM'
22 | WHEN ReadTier = 12 THEN '8AM,8PM'
23 | WHEN ReadTier = 13 THEN '12AM,12PM'
24 | WHEN ReadTier = 14 THEN '5AM'
25 | END AS [ReadTimes]
26 | ,[Category]
27 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 ORDER BY ReadTier, FeedName ASC
28 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/ReadTier_Distribution.sql:
--------------------------------------------------------------------------------
1 | /* This query finds the overall readtier distribution - use this to spread the RSS Feeds out uniformly in fast, regular, or slow, for best performance */
2 | SELECT '1' AS [Tier], Count(*) AS [Fast]
3 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 1
4 | SELECT '2' AS [Tier], Count(*) AS [Fast]
5 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 2
6 | SELECT '3' AS [Tier], Count(*) AS [Fast]
7 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 3
8 | SELECT '4' AS [Tier], Count(*) AS [Fast]
9 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 4
10 | SELECT '5' AS [Tier], Count(*) AS [Regular]
11 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 5
12 | SELECT '6' AS [Tier], Count(*) AS [Regular]
13 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 6
14 | SELECT '7' AS [Tier], Count(*) AS [Regular]
15 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 7
16 | SELECT '8' AS [Tier], Count(*) AS [Regular]
17 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 8
18 | SELECT '9' AS [Tier], Count(*) AS [Regular]
19 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 9
20 | SELECT '10' AS [Tier], Count(*) AS [Regular]
21 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 10
22 | SELECT '11' AS [Tier], Count(*) AS [Slow]
23 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 11
24 | SELECT '12' AS [Tier], Count(*) AS [Slow]
25 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 12
26 | SELECT '13' AS [Tier], Count(*) AS [Slow]
27 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 13
28 | SELECT '14' AS [Tier], Count(*) AS [Daily]
29 | FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1 AND ReadTier = 14
30 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Reporter_FindMissing.sql:
--------------------------------------------------------------------------------
1 | /* This query finds if any reporters do not have any articles downloaded in the table - used to troubleshoot and fix feeds */
2 | SELECT reporter
3 | FROM (SELECT FeedName FROM [RSSFunctionApp_RSSFeeds] WHERE Enabled = 1) R(reporter)
4 | EXCEPT
5 | SELECT DISTINCT [Reporter]
6 | FROM [NewsFeed_NewsArticles] ORDER BY [Reporter] ASC
7 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Reporter_LastInsert.sql:
--------------------------------------------------------------------------------
1 | /* This query finds the date of the last article downloaded in the table for a reporter - used to troubleshoot and fix feeds */
2 | select x.reporter, x.datetime
3 | from (
4 | select reporter, datetime,
5 | row_number() over (partition by reporter order by datetime desc) as _rn
6 | from NewsFeed_NewsArticles
7 | ) x
8 | where x._rn = 1
9 | ORDER BY DateTime ASC
10 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/Reporter_UselessArticleCount.sql:
--------------------------------------------------------------------------------
1 | /* This query finds how many non scored articles exist per reporter - used to fine tune the article scoring in the TopArticles_StarbotKeywords table */
2 | SELECT [Reporter], Count(*) as [counter]
3 | FROM [NewsFeed_NewsArticles]
4 | WHERE StarBotScore = 0
5 | group by [Reporter]
6 | order by [counter] desc
7 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/TopArticles_HighestScore.sql:
--------------------------------------------------------------------------------
1 | /* This query finds the highest scoring articles for time frames */
2 | SELECT TOP (5) [Reporter],[Article],[URL],[StarBotScore]
3 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-5,GETDATE()) AND dateadd(hour,7,GETDATE()) ORDER BY StarBotScore DESC
4 | SELECT TOP (5) [Reporter],[Article],[URL],[StarBotScore]
5 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-17,GETDATE()) AND dateadd(hour,-5,GETDATE()) ORDER BY StarBotScore DESC
6 | SELECT TOP (5) [Reporter],[Article],[URL],[StarBotScore]
7 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-65,GETDATE()) AND dateadd(hour,-17,GETDATE()) ORDER BY StarBotScore DESC
8 | SELECT TOP (5) [Reporter],[Article],[URL],[StarBotScore]
9 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-161,GETDATE()) AND dateadd(hour,-65,GETDATE()) ORDER BY StarBotScore DESC
10 | SELECT TOP (5) [Reporter],[Article],[URL],[StarBotScore]
11 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-329,GETDATE()) AND dateadd(hour,-161,GETDATE()) ORDER BY StarBotScore DESC
12 | SELECT TOP (50) [Reporter],[Article],[URL],[DateTime],[StarBotScore]
13 | FROM [NewsFeed_NewsArticles] WHERE DateTime BETWEEN dateadd(hour,-24,GETDATE()) AND GETDATE() ORDER BY StarBotScore DESC
14 | /* This query finds the highest scoring articles in the table and uses a decay algorithm to give the most recent more precedence */
15 | SELECT TOP (50) [Article],[URL],[Reporter],[DateTime],CASE
16 | WHEN (datediff(hh,DateTime,GETDATE()) >= 102) THEN [StarBotScore] - 50
17 | WHEN (datediff(hh,DateTime,GETDATE()) >= 96) THEN [StarBotScore] - 40
18 | WHEN (datediff(hh,DateTime,GETDATE()) >= 90) THEN [StarBotScore] - 35
19 | WHEN (datediff(hh,DateTime,GETDATE()) >= 84) THEN [StarBotScore] - 30
20 | WHEN (datediff(hh,DateTime,GETDATE()) >= 78) THEN [StarBotScore] - 25
21 | WHEN (datediff(hh,DateTime,GETDATE()) >= 72) THEN [StarBotScore] - 20
22 | WHEN (datediff(hh,DateTime,GETDATE()) >= 66) THEN [StarBotScore] - 18
23 | WHEN (datediff(hh,DateTime,GETDATE()) >= 60) THEN [StarBotScore] - 16
24 | WHEN (datediff(hh,DateTime,GETDATE()) >= 54) THEN [StarBotScore] - 14
25 | WHEN (datediff(hh,DateTime,GETDATE()) >= 48) THEN [StarBotScore] - 12
26 | WHEN (datediff(hh,DateTime,GETDATE()) >= 42) THEN [StarBotScore] - 10
27 | WHEN (datediff(hh,DateTime,GETDATE()) >= 36) THEN [StarBotScore] - 8
28 | WHEN (datediff(hh,DateTime,GETDATE()) >= 30) THEN [StarBotScore] - 6
29 | WHEN (datediff(hh,DateTime,GETDATE()) >= 24) THEN [StarBotScore] - 4
30 | WHEN (datediff(hh,DateTime,GETDATE()) >= 18) THEN [StarBotScore] - 3
31 | WHEN (datediff(hh,DateTime,GETDATE()) >= 12) THEN [StarBotScore] - 2
32 | WHEN (datediff(hh,DateTime,GETDATE()) >= 6) THEN [StarBotScore] - 1
33 | ELSE [StarBotScore]
34 | END AS [StarBotScore]
35 | FROM [NewsFeed_NewsArticles] ORDER BY StarBotScore DESC, DateTime DESC
36 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/Queries/TopArticles_MainPageScoring.sql:
--------------------------------------------------------------------------------
1 | /* This query finds articles that should appear on the main page due to having a score over a certain threshold */
2 | SELECT TOP (50) [ID],[Reporter],[Article],[URL],[StarBotScore]
3 | FROM [NewsFeed_NewsArticles] WHERE StarBotScore > 28 ORDER BY StarBotScore DESC
4 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/RSSFeedAggregator.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using Microsoft.Azure.WebJobs;
3 | using System.Data.SqlClient;
4 | using System.Net;
5 | using System.ServiceModel.Syndication;
6 | using System.Xml;
7 | using System.Text.RegularExpressions;
8 | using System.Data;
9 | using System.Collections.Generic;
10 | using System.Linq;
11 |
12 | namespace RSSFeedAggregator
13 | {
14 | public class RSSFeedAggregator
15 | {
16 | string _connectionString;
17 | string _userAgent;
18 | string _paywallRemover;
19 | int _connectionTimeout;
20 | int _ArticleKeepAge;
21 | bool _scoreArticles;
22 | bool _StarBotsActive;
23 | Dictionary _ArticleScoreKeywords;
24 |
25 | #region "Initialization"
26 | public void Initialize()
27 | {
28 | _connectionString = "ConnectionStringGoesHere";
29 | _userAgent = "UserAgentGoesHere";
30 | _connectionTimeout = 5000; //(milliseconds) 5 second timeout
31 | _paywallRemover = "https://PaywallRemoverWebsite/proxy?q=";
32 | _ArticleKeepAge = -14; //(days) fetch articles with an age of 2 weeks maximum
33 | _scoreArticles = false; //set to true to score each article based on keywords entered in the TopArticles_StarbotKeywords table
34 | _StarBotsActive = false; //set to true to have bots periodically star (favorite) the top scoring articles, requires TopArticles_StarbotKeywords table set up
35 | _ArticleScoreKeywords = InitKeywordTable();
36 | }
37 | #endregion
38 |
39 | #region "Main"
40 | [FunctionName("RSSFeedAggregator")]
41 | public void Run([TimerTrigger("0 0 * * * *")] TimerInfo myTimer)
42 | {
43 | try
44 | {
45 | Initialize();
46 | //Fast - every four hours time wheel (6 times per day)
47 | switch (true)
48 | {
49 | case bool _ when DateTime.Now.AddHours(1).Hour % 4 == 0:
50 | ReadRssLists(1);
51 | StarBot(1, 1, -5, 7);
52 | break;
53 | case bool _ when DateTime.Now.AddHours(2).Hour % 4 == 0:
54 | ReadRssLists(2);
55 | StarBot(1, 1, -5, 7);
56 | break;
57 | case bool _ when DateTime.Now.AddHours(3).Hour % 4 == 0:
58 | ReadRssLists(3);
59 | StarBot(1, 1, -5, 7);
60 | break;
61 | case bool _ when DateTime.Now.Hour % 4 == 0:
62 | ReadRssLists(4);
63 | StarBot(1, 1, -5, 7);
64 | break;
65 | }
66 | //Regular - every 6 hours time wheel (4 times per day)
67 | switch (true)
68 | {
69 | case bool _ when DateTime.Now.AddHours(1).Hour % 6 == 0:
70 | ReadRssLists(5);
71 | break;
72 | case bool _ when DateTime.Now.AddHours(2).Hour % 6 == 0:
73 | ReadRssLists(6);
74 | break;
75 | case bool _ when DateTime.Now.AddHours(3).Hour % 6 == 0:
76 | ReadRssLists(7);
77 | break;
78 | case bool _ when DateTime.Now.AddHours(4).Hour % 6 == 0:
79 | ReadRssLists(8);
80 | break;
81 | case bool _ when DateTime.Now.AddHours(5).Hour % 6 == 0:
82 | ReadRssLists(9);
83 | break;
84 | case bool _ when DateTime.Now.Hour % 6 == 0:
85 | ReadRssLists(10);
86 | break;
87 | }
88 | //Slow - every 12 hours time wheel (2 times per day)
89 | switch (true)
90 | {
91 | case bool _ when DateTime.Now.AddHours(4).Hour % 12 == 0:
92 | ReadRssLists(11);
93 | StarBot(2, 1, -17, 7);
94 | break;
95 | case bool _ when DateTime.Now.AddHours(8).Hour % 12 == 0:
96 | ReadRssLists(12);
97 | StarBot(3, 1, -161, 7);
98 | break;
99 | case bool _ when DateTime.Now.Hour % 12 == 0:
100 | ReadRssLists(13);
101 | StarBot(2, 1, -17, 7);
102 | DeleteOldTags();
103 | GenerateTags();
104 | break;
105 | }
106 | //Daily - every 24 hours (1 time per day)
107 | switch (DateTime.Now.Hour)
108 | {
109 | case 1:
110 | DeleteOlderThan2Weeks();
111 | break;
112 | case 5:
113 | ReadRssLists(14);
114 | StarBot(4, 1, -329, 7);
115 | break;
116 | }
117 | }
118 | catch (Exception e)
119 | {
120 | TryToDumpErrorToDB(e, "None", "None", "Main Method");
121 | }
122 | }
123 | #endregion
124 |
125 | #region "RSS Reading Subs"
126 | public void ReadRssLists(int readtier)
127 | {
128 | try
129 | {
130 | using SqlConnection conn = new(_connectionString);
131 | conn.Open();
132 | using SqlCommand comm = new();
133 | comm.Connection = conn;
134 | comm.CommandType = CommandType.StoredProcedure;
135 | comm.CommandText = "RSSFunctionApp";
136 | comm.Parameters.AddWithValue("@Switch", 2);
137 | comm.Parameters.AddWithValue("@ReadTier", readtier);
138 | using SqlDataReader rdr = comm.ExecuteReader();
139 | while (rdr.Read())
140 | {
141 | try
142 | {
143 | string thefeedname = rdr.GetString(0);
144 | string thefeedurl = rdr.GetString(1);
145 | bool compareall = rdr.GetBoolean(2);
146 | bool removepaywall = rdr.GetBoolean(3);
147 | bool ingestionlogging = rdr.GetBoolean(4);
148 | ReadTheFeed(thefeedname, thefeedurl, compareall, removepaywall, ingestionlogging);
149 | }
150 | catch (Exception e)
151 | {
152 | TryToDumpErrorToDB(e, "None", "None", "ReadRSSLists");
153 | }
154 | }
155 | conn.Close();
156 | }
157 | catch (Exception e)
158 | {
159 | TryToDumpErrorToDB(e, "None", "None", "ReadRSSLists");
160 | }
161 | }
162 |
163 | public void ReadTheFeed(string feedname, string feedurl, bool compareall, bool removepaywall, bool ingestionlogging)
164 | {
165 | HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(feedurl);
166 | myReq.UserAgent = _userAgent;
167 | myReq.Timeout = _connectionTimeout;
168 | try
169 | {
170 | using HttpWebResponse response = (HttpWebResponse)myReq.GetResponse();
171 | using XmlReader reader = XmlReader.Create(response.GetResponseStream());
172 | SyndicationFeed feed = SyndicationFeed.Load(reader);
173 | if (feed == null) return;
174 | try
175 | {
176 | List linkList = InitLinkList(feedname, compareall);
177 | int uploadedArticle = 0;
178 | int discardedArticle = 0;
179 | foreach (SyndicationItem i in feed.Items)
180 | {
181 | try
182 | {
183 | string linkuri = "";
184 | string imageuri = "";
185 | DateTime pubDateTime;
186 | try
187 | {
188 | pubDateTime = i.PublishDate.ToUniversalTime().DateTime;
189 |
190 | }
191 | catch (Exception)
192 | {
193 | discardedArticle++;
194 | continue;
195 | }
196 | switch (i.Links.Count)
197 | {
198 | case 1:
199 | linkuri = i.Links[0].Uri.ToString().ToLower();
200 | break;
201 | case > 1:
202 | {
203 | foreach (SyndicationLink x in i.Links)
204 | {
205 | string thislink = x.Uri.ToString();
206 | switch (x.RelationshipType)
207 | {
208 | case null:
209 | continue;
210 | case "enclosure" when x.MediaType != null:
211 | {
212 | if (x.MediaType.Contains("image"))
213 | {
214 | List imgExtensions = new() { ".png", ".jpg", ".jpeg", ".gif" };
215 | foreach (string q in imgExtensions.Where(q => thislink.ToLower().Contains(q)).Where(_ => imageuri == ""))
216 | {
217 | imageuri = thislink;
218 | }
219 | }
220 | break;
221 | }
222 | default:
223 | {
224 | if (linkuri == "")
225 | {
226 | linkuri = thislink.ToLower();
227 | }
228 | break;
229 | }
230 | }
231 | }
232 | break;
233 | }
234 | }
235 | if (linkuri == "")
236 | {
237 | discardedArticle++;
238 | continue;
239 | }
240 | if (removepaywall) linkuri = _paywallRemover + linkuri;
241 | if (linkList.Contains(linkuri)) continue;
242 | if (!IsValidDateKeepRange(pubDateTime) || linkuri.Length >= 451) continue;
243 | string titletext = "";
244 | string summarytext = "";
245 | if (i.Title.Text != null)
246 | {
247 | titletext = i.Title.Text;
248 | if (titletext.Length > 250) titletext = titletext[..250];
249 | }
250 | if (i.Summary != null) summarytext = SummaryClipping(i.Summary.Text);
251 | int articlescore = 0;
252 | if (_scoreArticles) articlescore = ArticleScoring(titletext.ToLower());
253 | TryToDumpStoryToDB(feedname, titletext, summarytext, linkuri, pubDateTime, articlescore, imageuri);
254 | linkList.Add(linkuri);
255 | uploadedArticle++;
256 | }
257 | catch (Exception e)
258 | {
259 | TryToDumpErrorToDB(e, feedname, "None", "ReadTheFeed3");
260 | }
261 | }
262 | if (ingestionlogging && !(uploadedArticle == 0 && discardedArticle == 0)) LogIngestedPercentage(feedname, uploadedArticle, discardedArticle);
263 | }
264 | catch (Exception e)
265 | {
266 | TryToDumpErrorToDB(e, feedname, "None", "ReadTheFeed2");
267 | }
268 | }
269 | catch (Exception e)
270 | {
271 | TryToDumpErrorToDB(e, feedname, "None", "ReadTheFeed1");
272 | }
273 | }
274 |
275 | public bool IsValidDateKeepRange(DateTime? dateTime)
276 | {
277 | if (dateTime == null) return false;
278 |
279 | DateTime minValue = DateTime.Now.ToUniversalTime().AddDays(_ArticleKeepAge);
280 | DateTime maxValue = DateTime.Now.ToUniversalTime();
281 |
282 | return minValue <= dateTime.Value && maxValue >= dateTime.Value;
283 | }
284 |
285 | public static string SummaryClipping(string summary)
286 | {
287 | summary = Regex.Replace(summary, "<[^>]*>", string.Empty);
288 | summary = Regex.Replace(summary, @"\s+", " ");
289 | if (summary.Length > 2000) summary = summary[..2000];
290 | return summary;
291 | }
292 |
293 | public int ArticleScoring(string articleTitle)
294 | {
295 | int articlescore = 0;
296 | int maxtierfour = 4;
297 | int maxtiersix = 8;
298 | articleTitle = Regex.Replace(articleTitle, "[^a-z ]", string.Empty);
299 | string[] splitwordarray = articleTitle.Split(" ");
300 | foreach (string word in splitwordarray)
301 | {
302 | if (_ArticleScoreKeywords.ContainsKey(word))
303 | {
304 | switch (_ArticleScoreKeywords[word])
305 | {
306 | case 1:
307 | articlescore += 13;
308 | break;
309 | case 2:
310 | articlescore += 11;
311 | break;
312 | case 3:
313 | articlescore += 9;
314 | break;
315 | case 4:
316 | if (maxtierfour > 0)
317 | {
318 | maxtierfour -= 1;
319 | articlescore += 5;
320 | }
321 | break;
322 | case 5:
323 | articlescore += 3;
324 | break;
325 | case 6:
326 | if (maxtiersix > 0)
327 | {
328 | maxtiersix -= 1;
329 | articlescore += 1;
330 | }
331 | break;
332 | case 7:
333 | articlescore -= 1;
334 | break;
335 | case 8:
336 | articlescore -= 3;
337 | break;
338 | case 9:
339 | articlescore -= 5;
340 | break;
341 | case 10:
342 | articlescore -= 9;
343 | break;
344 | case 11:
345 | articlescore -= 11;
346 | break;
347 | case 12:
348 | articlescore -= 13;
349 | break;
350 | }
351 | }
352 | }
353 | return articlescore;
354 | }
355 |
356 | public List InitLinkList(string feedname, bool compareall)
357 | {
358 | List linkList = new();
359 | string urlselectquery = "SELECT URL FROM NewsFeed_NewsArticles WHERE Reporter = @Reporter ORDER BY DateTime DESC";
360 | if (compareall) { urlselectquery = "SELECT URL FROM NewsFeed_NewsArticles ORDER BY DateTime DESC"; }
361 |
362 | using SqlConnection conn = new(_connectionString);
363 | conn.Open();
364 | using SqlCommand comm = new(urlselectquery, conn);
365 | if (compareall == false) { comm.Parameters.AddWithValue("@Reporter", feedname); }
366 | using SqlDataReader rdr = comm.ExecuteReader();
367 | while (rdr.Read())
368 | {
369 | linkList.Add(rdr.GetString(0).ToLower());
370 | }
371 | conn.Close();
372 | return linkList;
373 | }
374 |
375 | public Dictionary InitKeywordTable()
376 | {
377 | Dictionary keywordTable = new();
378 | using SqlConnection conn = new(_connectionString);
379 | conn.Open();
380 | using SqlCommand comm = new();
381 | comm.Connection = conn;
382 | comm.CommandType = CommandType.StoredProcedure;
383 | comm.CommandText = "RSSFunctionApp";
384 | comm.Parameters.AddWithValue("@Switch", 1);
385 | using SqlDataReader rdr = comm.ExecuteReader();
386 | while (rdr.Read())
387 | {
388 | keywordTable.Add(rdr.GetString(0).ToLower(), rdr.GetInt32(1));
389 | }
390 | conn.Close();
391 | return keywordTable;
392 | }
393 | #endregion
394 |
395 | #region "Database Inserts"
396 | public void TryToDumpStoryToDB(string feedName, string title, string summary, string link, DateTime pubDate, int articleScore, string image)
397 | {
398 | using SqlConnection conn = new(_connectionString);
399 | using SqlCommand comm = new("INSERT INTO NewsFeed_NewsArticles (Reporter, Article, URL, DateTime, Summary, StarBotScore, Image) VALUES (@Reporter, @Article, @URL, @DateTime, @Summary, @StarBotScore, @Image)", conn);
400 | comm.CommandType = CommandType.Text;
401 | comm.Parameters.AddWithValue("@Reporter", feedName);
402 | comm.Parameters.AddWithValue("@Article", title);
403 | comm.Parameters.AddWithValue("@Summary", summary);
404 | comm.Parameters.AddWithValue("@URL", link);
405 | comm.Parameters.AddWithValue("@DateTime", pubDate);
406 | comm.Parameters.AddWithValue("@StarBotScore", articleScore);
407 | comm.Parameters.AddWithValue("@Image", image);
408 | try
409 | {
410 | conn.Open();
411 | comm.ExecuteNonQuery();
412 | }
413 | catch (Exception e)
414 | {
415 | TryToDumpErrorToDB(e, feedName, title, "TryToDumpStoryToDB");
416 | }
417 | }
418 |
419 | public void TryToDumpErrorToDB(Exception eMessage, string feedName, string articleTitle, string subroutine)
420 | {
421 | try
422 | {
423 | string errormessagetext = eMessage.Message;
424 | string articletext = articleTitle;
425 | string sourcetext = eMessage.StackTrace;
426 | string typetext = eMessage.GetType().Name;
427 | if (errormessagetext.Length > 2000) errormessagetext = errormessagetext[..2000];
428 | if (articletext.Length > 250) articletext = articletext[..250];
429 | if (sourcetext is { Length: > 5000 }) sourcetext = sourcetext[..5000];
430 | if (typetext.Length > 2000) typetext = typetext[..2000];
431 | using SqlConnection conn = new(_connectionString);
432 | using SqlCommand comm = new("INSERT INTO RSSFunctionApp_ErrorLogs (Type, Message, Source, FeedName, ArticleTitle, Subroutine, DateTime, Handled) VALUES (@Type, @Message, @Source, @FeedName, @ArticleTitle, @Subroutine, GETDATE(), 0)", conn);
433 | comm.CommandType = CommandType.Text;
434 | comm.Parameters.AddWithValue("@Type", typetext);
435 | comm.Parameters.AddWithValue("@Message", errormessagetext);
436 | comm.Parameters.AddWithValue("@Source", sourcetext);
437 | comm.Parameters.AddWithValue("@FeedName", feedName);
438 | comm.Parameters.AddWithValue("@ArticleTitle", articletext);
439 | comm.Parameters.AddWithValue("@Subroutine", subroutine);
440 | conn.Open();
441 | comm.ExecuteNonQuery();
442 | }
443 | catch (Exception)
444 | {
445 | //Error Writing To Database - Write Error Locally
446 | }
447 | }
448 |
449 | public void LogIngestedPercentage(string feedName, int uploadedCount, int discardedCount)
450 | {
451 | int totalCount = uploadedCount + discardedCount;
452 | float uploadedPercentage = (float)uploadedCount / (float)totalCount * 100;
453 | using SqlConnection conn = new(_connectionString);
454 | using SqlCommand comm = new("INSERT INTO RSSFunctionApp_IngestionLogs (FeedName, UploadedCount, DiscardedCount, TotalCount, UploadedPercentage, DateTime) VALUES (@FeedName, @UploadedCount, @DiscardedCount, @TotalCount, @UploadedPercentage, GETDATE())", conn);
455 | comm.CommandType = CommandType.Text;
456 | comm.Parameters.AddWithValue("@FeedName", feedName);
457 | comm.Parameters.AddWithValue("@UploadedCount", uploadedCount);
458 | comm.Parameters.AddWithValue("@DiscardedCount", discardedCount);
459 | comm.Parameters.AddWithValue("@TotalCount", totalCount);
460 | comm.Parameters.AddWithValue("@UploadedPercentage", uploadedPercentage);
461 | try
462 | {
463 | conn.Open();
464 | comm.ExecuteNonQuery();
465 | }
466 | catch (Exception e)
467 | {
468 | TryToDumpErrorToDB(e, feedName, "", "LogIngestedPercentage");
469 | }
470 | }
471 |
472 | public void GenerateTags()
473 | {
474 | try
475 | {
476 | Dictionary tagTable = new();
477 | List bannedTrendsList = new();
478 |
479 | // Read Banned Trends
480 | using SqlConnection conn1 = new(_connectionString);
481 | conn1.Open();
482 | using SqlCommand comm1 = new();
483 | comm1.Connection = conn1;
484 | comm1.CommandType = CommandType.StoredProcedure;
485 | comm1.CommandText = "RSSFunctionApp";
486 | comm1.Parameters.AddWithValue("@Switch", 3);
487 | using SqlDataReader rdr1 = comm1.ExecuteReader();
488 | while (rdr1.Read())
489 | {
490 | bannedTrendsList.Add(rdr1.GetString(0).ToLower());
491 | }
492 | conn1.Close();
493 |
494 | // Read Articles
495 | using SqlConnection conn2 = new(_connectionString);
496 | conn2.Open();
497 | using SqlCommand comm2 = new();
498 | comm2.Connection = conn2;
499 | comm2.CommandType = CommandType.StoredProcedure;
500 | comm2.CommandText = "RSSFunctionApp";
501 | comm2.Parameters.AddWithValue("@Switch", 4);
502 | using SqlDataReader rdr2 = comm2.ExecuteReader();
503 | while (rdr2.Read())
504 | {
505 | string rawstring = rdr2.GetString(0).ToLower();
506 | rawstring = Regex.Replace(rawstring, "[^a-z. ]", string.Empty);
507 | string[] splitarray = rawstring.Split(" ");
508 | foreach (string splitstring in splitarray)
509 | {
510 | if (bannedTrendsList.Contains(splitstring)) continue;
511 | if (tagTable.ContainsKey(splitstring))
512 | {
513 | tagTable[splitstring]++;
514 | }
515 | else
516 | {
517 | tagTable.Add(splitstring, 1);
518 | }
519 | break;
520 | }
521 | }
522 | conn2.Close();
523 |
524 | var sortedtagTable = from entry in tagTable orderby entry.Value descending select entry;
525 |
526 | using SqlConnection conn3 = new(_connectionString);
527 | using SqlCommand comm3 = new("INSERT INTO NewsFeed_Tags (Tag) VALUES (@Tag0),(@Tag1),(@Tag2),(@Tag3),(@Tag4),(@Tag5),(@Tag6),(@Tag7),(@Tag8),(@Tag9),(@Tag10),(@Tag11),(@Tag12),(@Tag13),(@Tag14),(@Tag15),(@Tag16),(@Tag17),(@Tag18),(@Tag19),(@Tag20),(@Tag21),(@Tag22),(@Tag23),(@Tag24),(@Tag25),(@Tag26),(@Tag27),(@Tag28),(@Tag29)", conn3);
528 | comm3.CommandType = CommandType.Text;
529 | for (int i = 0; i < 30; i++)
530 | {
531 | comm3.Parameters.AddWithValue("@Tag" + i, sortedtagTable.ElementAt(i).Key);
532 | }
533 | conn3.Open();
534 | comm3.ExecuteNonQuery();
535 | }
536 | catch (Exception e)
537 | {
538 | TryToDumpErrorToDB(e, "None", "None", "GenerateTags");
539 | }
540 | }
541 |
542 | public void StarBot(int botNumber, int starCount, int fromRange, int toRange)
543 | {
544 | if (_StarBotsActive == false) { return; }
545 | try
546 | {
547 | using SqlConnection conn1 = new(_connectionString);
548 | conn1.Open();
549 | using SqlCommand comm1 = new();
550 | comm1.Connection = conn1;
551 | comm1.CommandType = CommandType.StoredProcedure;
552 | comm1.CommandText = "RSSFunctionApp";
553 | comm1.Parameters.AddWithValue("@Switch", 7);
554 | comm1.Parameters.AddWithValue("@FromRange", fromRange);
555 | comm1.Parameters.AddWithValue("@ToRange", toRange);
556 | comm1.Parameters.AddWithValue("@BotNumber", "StarBot" + botNumber);
557 | comm1.Parameters.AddWithValue("@TopCount", starCount);
558 | using SqlDataReader rdr1 = comm1.ExecuteReader();
559 | while (rdr1.Read())
560 | {
561 | using SqlConnection conn2 = new(_connectionString);
562 | using SqlCommand comm2 = new();
563 | comm2.Connection = conn2;
564 | comm2.CommandType = CommandType.StoredProcedure;
565 | comm2.CommandText = "NewsFeed_AddStarredArticle";
566 | comm2.Parameters.AddWithValue("@IPAddress", "StarBot" + botNumber);
567 | comm2.Parameters.AddWithValue("@NewsArticleID", rdr1.GetInt32(0));
568 | conn2.Open();
569 | comm2.ExecuteNonQuery();
570 | }
571 | conn1.Close();
572 | }
573 | catch (Exception e)
574 | {
575 | TryToDumpErrorToDB(e, "None", "None", "StarBot");
576 | }
577 | }
578 | #endregion
579 |
580 | #region "Database Deletes"
581 | public void DeleteOlderThan2Weeks()
582 | {
583 | try
584 | {
585 | using SqlConnection conn = new(_connectionString);
586 | using SqlCommand comm = new();
587 | comm.Connection = conn;
588 | comm.CommandType = CommandType.StoredProcedure;
589 | comm.CommandText = "RSSFunctionApp";
590 | comm.Parameters.AddWithValue("@Switch", 5);
591 | conn.Open();
592 | comm.ExecuteNonQuery();
593 | }
594 | catch (Exception e)
595 | {
596 | TryToDumpErrorToDB(e, "", "", "DeleteOlderThan2Weeks");
597 | }
598 | }
599 |
600 | public void DeleteOldTags()
601 | {
602 | try
603 | {
604 | using SqlConnection conn = new(_connectionString);
605 | using SqlCommand comm = new();
606 | comm.Connection = conn;
607 | comm.CommandType = CommandType.StoredProcedure;
608 | comm.CommandText = "RSSFunctionApp";
609 | comm.Parameters.AddWithValue("@Switch", 6);
610 | conn.Open();
611 | comm.ExecuteNonQuery();
612 | }
613 | catch (Exception e)
614 | {
615 | TryToDumpErrorToDB(e, "", "", "DeleteOldTags");
616 | }
617 | }
618 | #endregion
619 | }
620 | }
--------------------------------------------------------------------------------
/RSSFeedAggregator/RSSFeedAggregator.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | net6.0
4 | v4
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | PreserveNewest
14 |
15 |
16 | PreserveNewest
17 | Never
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/StoredProcedures/NewsFeed.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 | SET QUOTED_IDENTIFIER ON
7 | GO
8 | CREATE PROCEDURE [NewsFeed]
9 | @Switch INT = NULL,
10 | @UserIP NVARCHAR(25) = NULL
11 | AS
12 | BEGIN
13 | SET NOCOUNT ON
14 | /* Top Articles - 12H */
15 | IF @Switch = 1
16 | BEGIN
17 | SELECT TOP 5 * FROM (SELECT a.NewsArticleID AS [ID], COUNT(*) AS [StarCount], b.Reporter, b.Article, b.Summary, b.URL, b.DateTime FROM NewsFeed_StarredArticles a LEFT JOIN NewsFeed_NewsArticles b ON b.ID = a.NewsArticleID GROUP BY a.NewsArticleID, b.Reporter, b.Article, b.Summary, b.URL, b.DateTime) t WHERE DateTime BETWEEN dateadd(hour,-5,GETDATE()) AND dateadd(hour,7,GETDATE()) ORDER BY StarCount DESC, DateTime DESC
18 | END
19 | /* Top Articles - 24H */
20 | ELSE IF @Switch = 2
21 | BEGIN
22 | SELECT TOP 5 * FROM (SELECT a.NewsArticleID AS [ID], COUNT(*) AS [StarCount], b.Reporter, b.Article, b.Summary, b.URL, b.DateTime FROM NewsFeed_StarredArticles a LEFT JOIN NewsFeed_NewsArticles b ON b.ID = a.NewsArticleID GROUP BY a.NewsArticleID, b.Reporter, b.Article, b.Summary, b.URL, b.DateTime) t WHERE DateTime BETWEEN dateadd(hour,-17,GETDATE()) AND dateadd(hour,-5,GETDATE()) ORDER BY StarCount DESC, DateTime DESC
23 | END
24 | /* Top Articles - 3D */
25 | ELSE IF @Switch = 3
26 | BEGIN
27 | SELECT TOP 10 * FROM (SELECT a.NewsArticleID AS [ID], COUNT(*) AS [StarCount], b.Reporter, b.Article, b.Summary, b.URL, b.DateTime FROM NewsFeed_StarredArticles a LEFT JOIN NewsFeed_NewsArticles b ON b.ID = a.NewsArticleID GROUP BY a.NewsArticleID, b.Reporter, b.Article, b.Summary, b.URL, b.DateTime) t WHERE DateTime BETWEEN dateadd(hour,-65,GETDATE()) AND dateadd(hour,-17,GETDATE()) ORDER BY StarCount DESC, DateTime DESC
28 | END
29 | /* Top Articles - 7D */
30 | ELSE IF @Switch = 4
31 | BEGIN
32 | SELECT TOP 10 * FROM (SELECT a.NewsArticleID AS [ID], COUNT(*) AS [StarCount], b.Reporter, b.Article, b.Summary, b.URL, b.DateTime FROM NewsFeed_StarredArticles a LEFT JOIN NewsFeed_NewsArticles b ON b.ID = a.NewsArticleID GROUP BY a.NewsArticleID, b.Reporter, b.Article, b.Summary, b.URL, b.DateTime) t WHERE DateTime BETWEEN dateadd(hour,-161,GETDATE()) AND dateadd(hour,-65,GETDATE()) ORDER BY StarCount DESC, DateTime DESC
33 | END
34 | /* Top Articles - 14D */
35 | ELSE IF @Switch = 5
36 | BEGIN
37 | SELECT TOP 20 * FROM (SELECT a.NewsArticleID AS [ID], COUNT(*) AS [StarCount], b.Reporter, b.Article, b.Summary, b.URL, b.DateTime FROM NewsFeed_StarredArticles a LEFT JOIN NewsFeed_NewsArticles b ON b.ID = a.NewsArticleID GROUP BY a.NewsArticleID, b.Reporter, b.Article, b.Summary, b.URL, b.DateTime) t WHERE DateTime BETWEEN dateadd(hour,-329,GETDATE()) AND dateadd(hour,-161,GETDATE()) ORDER BY StarCount DESC, DateTime DESC
38 | END
39 | /* Display Trends */
40 | ELSE IF @Switch = 6
41 | BEGIN
42 | SELECT Tag FROM NewsFeed_Tags ORDER BY Tag ASC
43 | END
44 | /* Check Search Limit */
45 | ELSE IF @Switch = 7
46 | BEGIN
47 | SELECT COUNT(*) AS [Count] FROM (SELECT [IPAddress], [DateTime] FROM [NewsFeed_SearchHistory] WHERE [IPAddress] = @UserIP AND (DATEDIFF(mi,DateTime,GETDATE()) <= 60)) t
48 | END
49 | /* Check Star Limit */
50 | ELSE IF @Switch = 8
51 | BEGIN
52 | SELECT COUNT(*) AS [Count] FROM NewsFeed_StarredArticles WHERE UserIPAddress = @UserIP AND (DATEDIFF(hh,DateTime,GETDATE()) <= 24)
53 | END
54 | /* Top Articles - Favorite Articles */
55 | ELSE IF @Switch = 9
56 | BEGIN
57 | SELECT NewsFeed_NewsArticles.ID, NewsFeed_NewsArticles.Reporter, NewsFeed_NewsArticles.Article, NewsFeed_NewsArticles.URL, NewsFeed_NewsArticles.DateTime, NewsFeed_NewsArticles.Summary, NewsFeed_StarredArticles.UserIPAddress FROM NewsFeed_NewsArticles LEFT JOIN NewsFeed_StarredArticles ON NewsFeed_NewsArticles.ID = NewsFeed_StarredArticles.NewsArticleID WHERE NewsFeed_StarredArticles.UserIPAddress = @UserIP ORDER BY NewsFeed_NewsArticles.DateTime DESC
58 | END
59 | /* Display Available Star Count */
60 | ELSE IF @Switch = 10
61 | BEGIN
62 | SELECT ID FROM NewsFeed_StarredArticles WHERE UserIPAddress = @UserIP AND (datediff(hh,DateTime,GETDATE()) <= 24)
63 | END
64 | END
65 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/StoredProcedures/NewsFeed_AddComment.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_AddComment.sql
--------------------------------------------------------------------------------
/RSSFeedAggregator/StoredProcedures/NewsFeed_AddStarredArticle.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_AddStarredArticle.sql
--------------------------------------------------------------------------------
/RSSFeedAggregator/StoredProcedures/NewsFeed_DeleteBotStarred.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_DeleteBotStarred.sql
--------------------------------------------------------------------------------
/RSSFeedAggregator/StoredProcedures/NewsFeed_DisplayComments.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_DisplayComments.sql
--------------------------------------------------------------------------------
/RSSFeedAggregator/StoredProcedures/NewsFeed_LogSearch.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_LogSearch.sql
--------------------------------------------------------------------------------
/RSSFeedAggregator/StoredProcedures/NewsFeed_MainFeed.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/NewsFeed_MainFeed.sql
--------------------------------------------------------------------------------
/RSSFeedAggregator/StoredProcedures/RSSFunctionApp.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KylerCondran/RSSFeedAggregator/d0b95bdd45d299dab54953301095b6cf5a588089/RSSFeedAggregator/StoredProcedures/RSSFunctionApp.sql
--------------------------------------------------------------------------------
/RSSFeedAggregator/TableSchemas/NewsFeed_ArticleComments.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 |
7 | SET QUOTED_IDENTIFIER ON
8 | GO
9 |
10 | CREATE TABLE [NewsFeed_ArticleComments](
11 | [ID] [int] IDENTITY(1,1) NOT NULL,
12 | [NewsArticleID] [int] NOT NULL,
13 | [UserIPAddress] [nvarchar](25) NOT NULL,
14 | [Comment] [nvarchar](200) NULL,
15 | [DateTime] [datetime] NOT NULL,
16 | [UserName] [nvarchar](15) NULL,
17 | CONSTRAINT [PK_NewsFeed_ArticleComments] PRIMARY KEY CLUSTERED
18 | (
19 | [ID] ASC
20 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
21 | ) ON [PRIMARY]
22 | GO
23 |
24 |
25 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/TableSchemas/NewsFeed_BannedTrends.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 |
7 | SET QUOTED_IDENTIFIER ON
8 | GO
9 |
10 | CREATE TABLE [NewsFeed_BannedTrends](
11 | [Trend] [nvarchar](25) NOT NULL,
12 | CONSTRAINT [PK_NewsFeed_BannedTrends] PRIMARY KEY CLUSTERED
13 | (
14 | [Trend] ASC
15 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
16 | ) ON [PRIMARY]
17 | GO
18 |
19 |
20 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/TableSchemas/NewsFeed_NewsArticles.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 |
7 | SET QUOTED_IDENTIFIER ON
8 | GO
9 |
10 | CREATE TABLE [NewsFeed_NewsArticles](
11 | [ID] [int] IDENTITY(1,1) NOT NULL,
12 | [Reporter] [nvarchar](100) NOT NULL,
13 | [Article] [nvarchar](250) NOT NULL,
14 | [URL] [nvarchar](450) NOT NULL,
15 | [DateTime] [datetime] NOT NULL,
16 | [Summary] [nvarchar](2000) NULL,
17 | [StarBotScore] [int] NULL,
18 | [Image] [nvarchar](450) NULL,
19 | CONSTRAINT [PK_NewsFeed_NewsArticles] PRIMARY KEY CLUSTERED
20 | (
21 | [URL] ASC
22 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
23 | ) ON [PRIMARY]
24 | GO
25 |
26 |
27 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/TableSchemas/NewsFeed_SearchHistory.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 |
7 | SET QUOTED_IDENTIFIER ON
8 | GO
9 |
10 | CREATE TABLE [NewsFeed_SearchHistory](
11 | [ID] [int] IDENTITY(1,1) NOT NULL,
12 | [IPAddress] [nvarchar](100) NOT NULL,
13 | [SearchTerm] [nvarchar](100) NOT NULL,
14 | [DateTime] [datetime] NOT NULL,
15 | CONSTRAINT [PK_NewsFeed_SearchHistory] PRIMARY KEY CLUSTERED
16 | (
17 | [ID] ASC
18 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
19 | ) ON [PRIMARY]
20 | GO
21 |
22 |
23 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/TableSchemas/NewsFeed_StarredArticles.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 |
7 | SET QUOTED_IDENTIFIER ON
8 | GO
9 |
10 | CREATE TABLE [NewsFeed_StarredArticles](
11 | [ID] [int] IDENTITY(1,1) NOT NULL,
12 | [NewsArticleID] [int] NOT NULL,
13 | [UserIPAddress] [nvarchar](25) NOT NULL,
14 | [DateTime] [datetime] NOT NULL,
15 | CONSTRAINT [PK_NewsFeed_StarredArticles] PRIMARY KEY CLUSTERED
16 | (
17 | [ID] ASC
18 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
19 | ) ON [PRIMARY]
20 | GO
21 |
22 |
23 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/TableSchemas/NewsFeed_Tags.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 |
7 | SET QUOTED_IDENTIFIER ON
8 | GO
9 |
10 | CREATE TABLE [NewsFeed_Tags](
11 | [Tag] [nvarchar](25) NOT NULL,
12 | CONSTRAINT [PK_NewsFeed_Tags] PRIMARY KEY CLUSTERED
13 | (
14 | [Tag] ASC
15 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
16 | ) ON [PRIMARY]
17 | GO
18 |
19 |
20 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/TableSchemas/RSSFunctionApp_ErrorLogs.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 |
7 | SET QUOTED_IDENTIFIER ON
8 | GO
9 |
10 | CREATE TABLE [RSSFunctionApp_ErrorLogs](
11 | [ID] [int] IDENTITY(1,1) NOT NULL,
12 | [Type] [nvarchar](2000) NOT NULL,
13 | [Message] [nvarchar](2000) NOT NULL,
14 | [Source] [nvarchar](max) NOT NULL,
15 | [FeedName] [nvarchar](100) NOT NULL,
16 | [ArticleTitle] [nvarchar](250) NULL,
17 | [Subroutine] [nvarchar](100) NOT NULL,
18 | [DateTime] [datetime] NOT NULL,
19 | [Handled] [int] NOT NULL,
20 | CONSTRAINT [PK_RSSFunctionApp_ErrorLogs] PRIMARY KEY CLUSTERED
21 | (
22 | [ID] ASC
23 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
24 | ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
25 | GO
26 |
27 |
28 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/TableSchemas/RSSFunctionApp_IngestionLogs.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 |
7 | SET QUOTED_IDENTIFIER ON
8 | GO
9 |
10 | CREATE TABLE [RSSFunctionApp_IngestionLogs](
11 | [ID] [int] IDENTITY(1,1) NOT NULL,
12 | [FeedName] [nvarchar](100) NOT NULL,
13 | [UploadedCount] [int] NOT NULL,
14 | [DiscardedCount] [int] NOT NULL,
15 | [TotalCount] [int] NOT NULL,
16 | [UploadedPercentage] [float] NOT NULL,
17 | [DateTime] [datetime] NOT NULL,
18 | CONSTRAINT [PK_RSSFunctionApp_IngestionLogs] PRIMARY KEY CLUSTERED
19 | (
20 | [ID] ASC
21 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
22 | ) ON [PRIMARY]
23 | GO
24 |
25 |
26 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/TableSchemas/RSSFunctionApp_RSSFeeds.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 |
7 | SET QUOTED_IDENTIFIER ON
8 | GO
9 |
10 | CREATE TABLE [RSSFunctionApp_RSSFeeds](
11 | [ID] [int] IDENTITY(1,1) NOT NULL,
12 | [FeedName] [nvarchar](50) NOT NULL,
13 | [URL] [nvarchar](100) NOT NULL,
14 | [ReadTier] [int] NOT NULL,
15 | [Enabled] [int] NOT NULL,
16 | [Category] [nvarchar](5) NULL,
17 | [CompareAll] [bit] NULL,
18 | [RemovePaywall] [bit] NULL,
19 | [IngestionLogging] [bit] NULL,
20 | CONSTRAINT [PK_RSSFunctionApp_RSSFeeds] PRIMARY KEY CLUSTERED
21 | (
22 | [URL] ASC
23 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
24 | ) ON [PRIMARY]
25 | GO
26 |
27 |
28 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/TableSchemas/TopArticles_StarbotKeywords.sql:
--------------------------------------------------------------------------------
1 | USE [Database]
2 | GO
3 |
4 | SET ANSI_NULLS ON
5 | GO
6 |
7 | SET QUOTED_IDENTIFIER ON
8 | GO
9 |
10 | CREATE TABLE [TopArticles_StarbotKeywords](
11 | [Keyword] [nvarchar](25) NOT NULL,
12 | [Tier] [int] NOT NULL,
13 | CONSTRAINT [PK_TopArticles_StarbotKeywords] PRIMARY KEY CLUSTERED
14 | (
15 | [Keyword] ASC
16 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
17 | ) ON [PRIMARY]
18 | GO
19 |
20 |
21 |
--------------------------------------------------------------------------------
/RSSFeedAggregator/host.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0",
3 | "logging": {
4 | "applicationInsights": {
5 | "samplingSettings": {
6 | "isEnabled": true,
7 | "excludedTypes": "Request"
8 | },
9 | "enableLiveMetricsFilters": true
10 | }
11 | }
12 | }
--------------------------------------------------------------------------------