├── .gitattributes ├── .gitignore ├── LICENSE.md ├── LinkCrawler ├── LinkCrawler.Tests │ ├── LinkCrawler.Tests.csproj │ ├── LinkCrawlerTests.cs │ ├── Properties │ │ └── AssemblyInfo.cs │ ├── UtilsTests │ │ ├── ClientsTests │ │ │ └── SlackClientTests.cs │ │ ├── ExtensionsTests │ │ │ ├── HttpStatusCodeExtensionsTests.cs │ │ │ ├── RegexExtensionsTest.cs │ │ │ └── StringExtensionsTests.cs │ │ ├── OutputsTests │ │ │ └── SlackOutputTests.cs │ │ └── ParsersTests │ │ │ └── ValidUrlParserTests.cs │ └── packages.config ├── LinkCrawler.sln └── LinkCrawler │ ├── App.config │ ├── LinkCrawler.cs │ ├── LinkCrawler.csproj │ ├── Models │ ├── IResponseModel.cs │ ├── LinkModel.cs │ ├── RequestModel.cs │ └── ResponseModel.cs │ ├── Program.cs │ ├── Properties │ └── AssemblyInfo.cs │ ├── Utils │ ├── Clients │ │ ├── ISlackClient.cs │ │ └── SlackClient.cs │ ├── Extensions │ │ ├── HttpStatusCodeExtensions.cs │ │ ├── RegexExtensions.cs │ │ ├── RestRequestExtensions.cs │ │ ├── RestResponseExtensions.cs │ │ ├── StringExtensions.cs │ │ └── UriExtensions.cs │ ├── Helpers │ │ ├── ConsoleHelper.cs │ │ └── MarkupHelpers.cs │ ├── Outputs │ │ ├── ConsoleOutput.cs │ │ ├── CsvOutput.cs │ │ ├── IOutput.cs │ │ └── SlackOutput.cs │ ├── Parsers │ │ ├── IValidUrlParser.cs │ │ └── ValidUrlParser.cs │ ├── Settings │ │ ├── Constants.cs │ │ ├── ISettings.cs │ │ ├── MockSettings.cs │ │ └── Settings.cs │ └── StructureMapRegistry.cs │ └── packages.config ├── LinkCrawler_DotNet_7 └── LinkCrawler │ ├── LinkCrawler.Test │ ├── LinkCrawler.Test.csproj │ ├── UnitTest1.cs │ └── Usings.cs │ ├── LinkCrawler.sln │ └── LinkCrawler │ ├── LinkCrawler.csproj │ └── Program.cs └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | 10 | # User-specific files (MonoDevelop/Xamarin Studio) 11 | *.userprefs 12 | 13 | # Build results 14 | [Dd]ebug/ 15 | [Dd]ebugPublic/ 16 | [Rr]elease/ 17 | [Rr]eleases/ 18 | x64/ 19 | x86/ 20 | bld/ 21 | [Bb]in/ 22 | [Oo]bj/ 23 | 24 | # Visual Studio 2015 cache/options directory 25 | .vs/ 26 | # Uncomment if you have tasks that create the project's static files in wwwroot 27 | #wwwroot/ 28 | 29 | # MSTest test Results 30 | [Tt]est[Rr]esult*/ 31 | [Bb]uild[Ll]og.* 32 | 33 | # NUNIT 34 | *.VisualState.xml 35 | TestResult.xml 36 | 37 | # Build Results of an ATL Project 38 | [Dd]ebugPS/ 39 | [Rr]eleasePS/ 40 | dlldata.c 41 | 42 | # DNX 43 | project.lock.json 44 | artifacts/ 45 | 46 | *_i.c 47 | *_p.c 48 | *_i.h 49 | *.ilk 50 | *.meta 51 | *.obj 52 | *.pch 53 | *.pdb 54 | *.pgc 55 | *.pgd 56 | *.rsp 57 | *.sbr 58 | *.tlb 59 | *.tli 60 | *.tlh 61 | *.tmp 62 | *.tmp_proj 63 | *.log 64 | *.vspscc 65 | *.vssscc 66 | .builds 67 | *.pidb 68 | *.svclog 69 | *.scc 70 | 71 | # Chutzpah Test files 72 | _Chutzpah* 73 | 74 | # Visual C++ cache files 75 | ipch/ 76 | *.aps 77 | *.ncb 78 | *.opendb 79 | *.opensdf 80 | *.sdf 81 | *.cachefile 82 | 83 | # Visual Studio profiler 84 | *.psess 85 | *.vsp 86 | *.vspx 87 | *.sap 88 | 89 | # TFS 2012 Local Workspace 90 | $tf/ 91 | 92 | # Guidance Automation Toolkit 93 | *.gpState 94 | 95 | # ReSharper is a .NET coding add-in 96 | _ReSharper*/ 97 | *.[Rr]e[Ss]harper 98 | *.DotSettings.user 99 | 100 | # JustCode is a .NET coding add-in 101 | .JustCode 102 | 103 | # TeamCity is a build add-in 104 | _TeamCity* 105 | 106 | # DotCover is a Code Coverage Tool 107 | *.dotCover 108 | 109 | # NCrunch 110 | _NCrunch_* 111 | .*crunch*.local.xml 112 | nCrunchTemp_* 113 | 114 | # MightyMoose 115 | *.mm.* 116 | AutoTest.Net/ 117 | 118 | # Web workbench (sass) 119 | .sass-cache/ 120 | 121 | # Installshield output folder 122 | [Ee]xpress/ 123 | 124 | # DocProject is a documentation generator add-in 125 | DocProject/buildhelp/ 126 | DocProject/Help/*.HxT 127 | DocProject/Help/*.HxC 128 | DocProject/Help/*.hhc 129 | DocProject/Help/*.hhk 130 | DocProject/Help/*.hhp 131 | DocProject/Help/Html2 132 | DocProject/Help/html 133 | 134 | # Click-Once directory 135 | publish/ 136 | 137 | # Publish Web Output 138 | *.[Pp]ublish.xml 139 | *.azurePubxml 140 | # TODO: Comment the next line if you want to checkin your web deploy settings 141 | # but database connection strings (with potential passwords) will be unencrypted 142 | *.pubxml 143 | *.publishproj 144 | 145 | # NuGet Packages 146 | *.nupkg 147 | # The packages folder can be ignored because of Package Restore 148 | **/packages/* 149 | # except build/, which is used as an MSBuild target. 150 | !**/packages/build/ 151 | # Uncomment if necessary however generally it will be regenerated when needed 152 | #!**/packages/repositories.config 153 | # NuGet v3's project.json files produces more ignoreable files 154 | *.nuget.props 155 | *.nuget.targets 156 | 157 | # Microsoft Azure Build Output 158 | csx/ 159 | *.build.csdef 160 | 161 | # Microsoft Azure Emulator 162 | ecf/ 163 | rcf/ 164 | 165 | # Microsoft Azure ApplicationInsights config file 166 | ApplicationInsights.config 167 | 168 | # Windows Store app package directory 169 | AppPackages/ 170 | BundleArtifacts/ 171 | 172 | # Visual Studio cache files 173 | # files ending in .cache can be ignored 174 | *.[Cc]ache 175 | # but keep track of directories ending in .cache 176 | !*.[Cc]ache/ 177 | 178 | # Others 179 | ClientBin/ 180 | ~$* 181 | *~ 182 | *.dbmdl 183 | *.dbproj.schemaview 184 | *.pfx 185 | *.publishsettings 186 | node_modules/ 187 | orleans.codegen.cs 188 | 189 | # RIA/Silverlight projects 190 | Generated_Code/ 191 | 192 | # Backup & report files from converting an old project file 193 | # to a newer Visual Studio version. Backup files are not needed, 194 | # because we have git ;-) 195 | _UpgradeReport_Files/ 196 | Backup*/ 197 | UpgradeLog*.XML 198 | UpgradeLog*.htm 199 | 200 | # SQL Server files 201 | *.mdf 202 | *.ldf 203 | 204 | # Business Intelligence projects 205 | *.rdl.data 206 | *.bim.layout 207 | *.bim_*.settings 208 | 209 | # Microsoft Fakes 210 | FakesAssemblies/ 211 | 212 | # GhostDoc plugin setting file 213 | *.GhostDoc.xml 214 | 215 | # Node.js Tools for Visual Studio 216 | .ntvs_analysis.dat 217 | 218 | # Visual Studio 6 build log 219 | *.plg 220 | 221 | # Visual Studio 6 workspace options file 222 | *.opt 223 | 224 | # Visual Studio LightSwitch build output 225 | **/*.HTMLClient/GeneratedArtifacts 226 | **/*.DesktopClient/GeneratedArtifacts 227 | **/*.DesktopClient/ModelManifest.xml 228 | **/*.Server/GeneratedArtifacts 229 | **/*.Server/ModelManifest.xml 230 | _Pvt_Extensions 231 | 232 | # Paket dependency manager 233 | .paket/paket.exe 234 | 235 | # FAKE - F# Make 236 | .fake/ 237 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 henrik molnes 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.Tests/LinkCrawler.Tests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {964A55BE-AC53-4E97-BAA9-55323323E3E3} 8 | Library 9 | Properties 10 | LinkCrawler.Tests 11 | LinkCrawler.Tests 12 | v4.5.2 13 | 512 14 | 15 | 16 | true 17 | full 18 | false 19 | bin\Debug\ 20 | DEBUG;TRACE 21 | prompt 22 | 4 23 | 24 | 25 | pdbonly 26 | true 27 | bin\Release\ 28 | TRACE 29 | prompt 30 | 4 31 | 32 | 33 | 34 | ..\packages\AutoFixture.4.5.0\lib\net452\AutoFixture.dll 35 | 36 | 37 | ..\packages\AutoFixture.AutoMoq.4.5.0\lib\net452\AutoFixture.AutoMoq.dll 38 | 39 | 40 | ..\packages\Fare.2.1.1\lib\net35\Fare.dll 41 | 42 | 43 | ..\packages\Moq.4.2.1510.2205\lib\net40\Moq.dll 44 | True 45 | 46 | 47 | ..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll 48 | True 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | App.config 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | {db53303b-f9fb-4d77-b656-d05db0420e6a} 81 | LinkCrawler 82 | 83 | 84 | 85 | 86 | 87 | 88 | 95 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.Tests/LinkCrawlerTests.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Models; 2 | using LinkCrawler.Utils.Clients; 3 | using LinkCrawler.Utils.Outputs; 4 | using LinkCrawler.Utils.Parsers; 5 | using LinkCrawler.Utils.Settings; 6 | using Moq; 7 | using NUnit.Framework; 8 | using System.Linq; 9 | 10 | namespace LinkCrawler.Tests 11 | { 12 | //Will test the dependencies LinkCrawler class has upon other classes 13 | [TestFixture] 14 | public class LinkCrawlerTests 15 | { 16 | public LinkCrawler LinkCrawler { get; set; } 17 | public Mock MockSlackClient { get; set; } 18 | public Settings Settings { get; set; } 19 | 20 | [SetUp] 21 | public void SetUp() 22 | { 23 | MockSlackClient = new Mock(); 24 | Settings = new Settings(); 25 | var parser = new ValidUrlParser(Settings); 26 | var outputs = new IOutput[] 27 | { 28 | new SlackOutput(MockSlackClient.Object), 29 | }; 30 | 31 | LinkCrawler = new LinkCrawler(outputs, parser, Settings); 32 | } 33 | 34 | [Test] 35 | public void WriteOutputAndNotifySlack_SucessResponse_NotifySlack() 36 | { 37 | var mockResponseModel = new Mock(); 38 | mockResponseModel.Setup(x => x.IsSuccess).Returns(false); 39 | 40 | LinkCrawler.WriteOutput(mockResponseModel.Object); 41 | MockSlackClient.Verify(m => m.NotifySlack(mockResponseModel.Object)); 42 | } 43 | 44 | [Test] 45 | public void CrawlForLinksInResponse_ResponseModelWithMarkup_ValidUrlFoundInMarkup() 46 | { 47 | var url = "http://www.github.com"; 48 | var markup = string.Format("this is html document a valid link", url); 49 | var mockResponseModel = new Mock(); 50 | mockResponseModel.Setup(x => x.Markup).Returns(markup); 51 | 52 | LinkCrawler.CrawlForLinksInResponse(mockResponseModel.Object); 53 | Assert.That(LinkCrawler.UrlList.Where(l=>l.Address == url).Count() > 0); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.Tests/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("LinkCrawler.Tests")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("LinkCrawler.Tests")] 13 | [assembly: AssemblyCopyright("Copyright © 2016")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("964a55be-ac53-4e97-baa9-55323323e3e3")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.Tests/UtilsTests/ClientsTests/SlackClientTests.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Utils.Clients; 2 | using NUnit.Framework; 3 | using LinkCrawler.Utils.Settings; 4 | 5 | namespace LinkCrawler.Tests.UtilsTests.ClientsTests { 6 | 7 | [TestFixture] 8 | public class SlackClientTests { 9 | 10 | //MethodName_StateUnderTest_ExpectedBehaviour 11 | [Test] 12 | public void SlackClient_InstantiationWithWebHookUrl_InstantiatedCorrectlyWithWebHookUrl() { 13 | MockSettings settings = new MockSettings(true); 14 | SlackClient sc = new SlackClient(settings); 15 | 16 | Assert.AreEqual(@"https://hooks.slack.com/services/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", sc.WebHookUrl); 17 | Assert.AreEqual("Homer Bot", sc.BotName); 18 | Assert.AreEqual(":homer:", sc.BotIcon); 19 | Assert.AreEqual("*Doh! There is a link not working* Url: {0} Statuscode: {1} The link is placed on this page: {2}", sc.MessageFormat); 20 | Assert.IsTrue(sc.HasWebHookUrl); 21 | 22 | } 23 | 24 | [Test] 25 | public void SlackClient_InstantiationWithoutWebHookUrl_InstantiatedCorrectlyWithoutWebHookUrl() { 26 | MockSettings settings = new MockSettings(false); 27 | SlackClient sc = new SlackClient(settings); 28 | 29 | Assert.AreEqual("", sc.WebHookUrl); 30 | Assert.AreEqual("Homer Bot", sc.BotName); 31 | Assert.AreEqual(":homer:", sc.BotIcon); 32 | Assert.AreEqual("*Doh! There is a link not working* Url: {0} Statuscode: {1} The link is placed on this page: {2}", sc.MessageFormat); 33 | Assert.IsFalse(sc.HasWebHookUrl); 34 | 35 | } 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.Tests/UtilsTests/ExtensionsTests/HttpStatusCodeExtensionsTests.cs: -------------------------------------------------------------------------------- 1 | using System.Net; 2 | using LinkCrawler.Utils.Extensions; 3 | using NUnit.Framework; 4 | 5 | namespace LinkCrawler.Tests.UtilsTests.ExtensionsTests 6 | { 7 | [TestFixture] 8 | public class HttpStatusCodeExtensionsTests 9 | { 10 | [TestCase(HttpStatusCode.OK,"200", true, Description = "Simple case of exact match")] 11 | [TestCase(HttpStatusCode.OK,"404", false, Description = "Simple case of no exact match")] 12 | [TestCase(HttpStatusCode.OK,"200,404", true, Description = "More complex: two codes in config")] 13 | [TestCase(HttpStatusCode.OK,"200, 404", true, Description = "Space after comma is okay")] 14 | [TestCase(HttpStatusCode.OK,"2xx", true, Description = "Use wildcards in code number")] 15 | [TestCase(HttpStatusCode.OK,"2Xx", true, Description = "X wildcard is not case sensitive")] 16 | [TestCase(HttpStatusCode.NotFound ,"xX4", true, Description = "Wildcard can be used for any digit")] 17 | [TestCase(HttpStatusCode.OK ,"2xx,xX0", true, Description = "Multiple wildcard codes allowed")] 18 | public void Will_match_a_single_code_exactly(HttpStatusCode givenCode, string givenConfig, bool expectedOutcome) 19 | { 20 | var result = givenCode.IsSuccess(givenConfig); 21 | 22 | Assert.That(result, Is.EqualTo(expectedOutcome)); 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.Tests/UtilsTests/ExtensionsTests/RegexExtensionsTest.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Utils.Extensions; 2 | using NUnit.Framework; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace LinkCrawler.Tests.UtilsTests.ExtensionsTests 10 | { 11 | [TestFixture] 12 | public class RegexExtensionsTest 13 | { 14 | public void IsNotMatch_Should_Return_False() 15 | { 16 | 17 | string regex = "(^http[s]?:\\/{2})|(^www)|(^\\/{1,2})"; 18 | string url = "website.com:///podcast/"; 19 | bool expression = RegexExtensions.IsNotMatch(new System.Text.RegularExpressions.Regex(regex), url); 20 | Assert.IsFalse(expression); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.Tests/UtilsTests/ExtensionsTests/StringExtensionsTests.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Utils.Extensions; 2 | using NUnit.Framework; 3 | 4 | namespace LinkCrawler.Tests.UtilsTests.ExtensionsTests 5 | { 6 | [TestFixture] 7 | public class StringExtensionsTests 8 | { 9 | //MethodName_StateUnderTest_ExpectedBehaviour 10 | [Test] 11 | public void ToBool_StringValueIsTrue_BooleanValueIsTrue() 12 | { 13 | var stringValue = "true"; 14 | var booleanValue = stringValue.ToBool(); 15 | Assert.AreEqual(true, booleanValue); 16 | } 17 | 18 | [Test] 19 | public void ToBool_StringValueIsFalse_BooleanValueIsFalse() 20 | { 21 | var stringValue = "false"; 22 | var booleanValue = stringValue.ToBool(); 23 | Assert.AreEqual(false, booleanValue); 24 | } 25 | 26 | [Test] 27 | public void ToBool_StringValueIsFoobar_BooleanValueIsFalse() 28 | { 29 | var stringValue = "Foobar"; 30 | var booleanValue = stringValue.ToBool(); 31 | Assert.AreEqual(false, booleanValue); 32 | } 33 | 34 | [Test] 35 | public void ToBool_StringValueIsEmpty_BooleanValueIsFalse() 36 | { 37 | var stringValue = ""; 38 | var booleanValue = stringValue.ToBool(); 39 | Assert.AreEqual(false, booleanValue); 40 | } 41 | 42 | [Test] 43 | public void ToBool_StringValueIsNull_BooleanValueIsFalse() 44 | { 45 | string stringValue = null; 46 | var booleanValue = stringValue.ToBool(); 47 | Assert.AreEqual(false, booleanValue); 48 | } 49 | 50 | [Test] 51 | public void StartsWithIgnoreCase_SameLetterAndSameCase_True() 52 | { 53 | var word = "Foobar"; 54 | var letter = "F"; 55 | var result = word.StartsWithIgnoreCase(letter); 56 | Assert.AreEqual(true, result); 57 | } 58 | 59 | [Test] 60 | public void StartsWithIgnoreCase_SameLetterAndDifferentCase_True() 61 | { 62 | var word = "Foobar"; 63 | var letter = "f"; 64 | var result = word.StartsWithIgnoreCase(letter); 65 | Assert.AreEqual(true, result); 66 | } 67 | 68 | [Test] 69 | public void StartsWithIgnoreCase_EmptyWord_False() 70 | { 71 | var word = ""; 72 | var letter = "A"; 73 | var result = word.StartsWithIgnoreCase(letter); 74 | Assert.AreEqual(false, result); 75 | } 76 | 77 | [Test] 78 | public void StartsWithIgnoreCase_LetterIsBlankSpace_False() 79 | { 80 | var word = "Foobar"; 81 | var letter = " "; 82 | var result = word.StartsWithIgnoreCase(letter); 83 | Assert.AreEqual(false, result); 84 | } 85 | 86 | [Test] 87 | public void StartsWithIgnoreCase_LetterIsNull_False() 88 | { 89 | var word = "Foobar"; 90 | string letter = null; 91 | var result = word.StartsWithIgnoreCase(letter); 92 | Assert.AreEqual(false, result); 93 | } 94 | 95 | [Test] 96 | public void StartsWithIgnoreCase_WordIsNull_False() 97 | { 98 | string word = null; 99 | var letter = "F"; 100 | var result = word.StartsWithIgnoreCase(letter); 101 | Assert.AreEqual(false, result); 102 | } 103 | 104 | [Test] 105 | public void StartsWithIgnoreCase_DifferentLetterAndDifferentCase_True() 106 | { 107 | var word = "Foobar"; 108 | var letter = "a"; 109 | var result = word.StartsWithIgnoreCase(letter); 110 | Assert.AreEqual(false, result); 111 | } 112 | 113 | [Test] 114 | public void StartsWithIgnoreCase_DifferentLetterAndSameCase_True() 115 | { 116 | var word = "Foobar"; 117 | var letter = "A"; 118 | var result = word.StartsWithIgnoreCase(letter); 119 | Assert.AreEqual(false, result); 120 | } 121 | 122 | [Test] 123 | public void TrimEnd_InputNull_Null() 124 | { 125 | string input = null; 126 | string expected = null; 127 | 128 | var actual = input.TrimEnd(""); 129 | 130 | Assert.AreEqual(expected, actual); 131 | } 132 | 133 | [Test] 134 | public void TrimEnd_InputEndsWithSuffix_RemovesSuffix() 135 | { 136 | string input = "friend"; 137 | string expected = "fri"; 138 | 139 | var actual = input.TrimEnd("end"); 140 | 141 | Assert.AreEqual(expected, actual); 142 | } 143 | 144 | [Test] 145 | public void TrimEnd_InputEndsWithSuffixDifferentCase_ReturnsOriginal() 146 | { 147 | string input = "friEND"; 148 | string expected = "friEND"; 149 | 150 | var actual = input.TrimEnd("end"); 151 | 152 | Assert.AreEqual(expected, actual); 153 | } 154 | 155 | [Test] 156 | public void TrimEnd_InputEndsWithSuffixDifferentCase_ReturnsEmptyString() 157 | { 158 | string input = "friend"; 159 | string expected = string.Empty; 160 | 161 | var actual = input.TrimEnd("friend"); 162 | 163 | Assert.AreEqual(expected, actual); 164 | } 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.Tests/UtilsTests/OutputsTests/SlackOutputTests.cs: -------------------------------------------------------------------------------- 1 | using AutoFixture; 2 | using NUnit.Framework; 3 | using LinkCrawler.Utils.Outputs; 4 | using AutoFixture.AutoMoq; 5 | using LinkCrawler.Utils.Clients; 6 | using LinkCrawler.Models; 7 | using Moq; 8 | 9 | namespace LinkCrawler.Tests.UtilsTests.OutputsTests 10 | { 11 | [TestFixture] 12 | public class SlackOutputTests 13 | { 14 | private Fixture _fixture; 15 | 16 | [SetUp] 17 | public void SetUup() 18 | { 19 | _fixture = new Fixture(); 20 | _fixture.Customize(new AutoMoqCustomization()); 21 | } 22 | 23 | [Test] 24 | public void WriteError() 25 | { 26 | 27 | var slackClient = _fixture.Freeze>(); 28 | var slackOutput = _fixture.Freeze(); 29 | var responseModel = _fixture.Create(); 30 | 31 | slackClient.Setup(x => x.NotifySlack(responseModel)); 32 | 33 | slackOutput.WriteError(responseModel); 34 | 35 | slackClient.VerifyAll(); 36 | 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.Tests/UtilsTests/ParsersTests/ValidUrlParserTests.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Utils.Parsers; 2 | using LinkCrawler.Utils.Settings; 3 | using NUnit.Framework; 4 | 5 | namespace LinkCrawler.Tests.UtilsTests.ParsersTests 6 | { 7 | [TestFixture] 8 | public class ValidUrlParserTests 9 | { 10 | public ValidUrlParser ValidUrlParser { get; set; } 11 | [SetUp] 12 | public void SetUp() 13 | { 14 | ValidUrlParser = new ValidUrlParser(new Settings()); 15 | } 16 | 17 | [Test] 18 | public void Parse_CompleteValidUrl_True() 19 | { 20 | var url = "http://www.github.com"; 21 | string parsed; 22 | var result = ValidUrlParser.Parse(url, out parsed); 23 | Assert.That(result, Is.True); 24 | Assert.That(parsed, Is.EqualTo(url)); 25 | } 26 | 27 | [Test] 28 | public void Parse_UrlNoScheme_True() 29 | { 30 | var url = "//www.github.com"; 31 | string parsed; 32 | var result = ValidUrlParser.Parse(url, out parsed); 33 | Assert.That(result, Is.True); 34 | var validUrl = "http:" + url; 35 | Assert.That(parsed, Is.EqualTo(validUrl)); 36 | } 37 | 38 | [Test] 39 | public void Parse_UrlOnlyRelativePath_True() 40 | { 41 | var relativeUrl = "/relative/path"; 42 | string parsed; 43 | var result = ValidUrlParser.Parse(relativeUrl, out parsed); 44 | Assert.That(result, Is.True); 45 | var validUrl = string.Format("{0}{1}",ValidUrlParser.BaseUrl, relativeUrl); 46 | 47 | Assert.That(parsed, Is.EqualTo(validUrl)); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.Tests/packages.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.24720.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkCrawler", "LinkCrawler\LinkCrawler.csproj", "{DB53303B-F9FB-4D77-B656-D05DB0420E6A}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkCrawler.Tests", "LinkCrawler.Tests\LinkCrawler.Tests.csproj", "{964A55BE-AC53-4E97-BAA9-55323323E3E3}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Release|Any CPU = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {DB53303B-F9FB-4D77-B656-D05DB0420E6A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 17 | {DB53303B-F9FB-4D77-B656-D05DB0420E6A}.Debug|Any CPU.Build.0 = Debug|Any CPU 18 | {DB53303B-F9FB-4D77-B656-D05DB0420E6A}.Release|Any CPU.ActiveCfg = Release|Any CPU 19 | {DB53303B-F9FB-4D77-B656-D05DB0420E6A}.Release|Any CPU.Build.0 = Release|Any CPU 20 | {964A55BE-AC53-4E97-BAA9-55323323E3E3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {964A55BE-AC53-4E97-BAA9-55323323E3E3}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {964A55BE-AC53-4E97-BAA9-55323323E3E3}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {964A55BE-AC53-4E97-BAA9-55323323E3E3}.Release|Any CPU.Build.0 = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/App.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 |
5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/LinkCrawler.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Models; 2 | using LinkCrawler.Utils.Extensions; 3 | using LinkCrawler.Utils.Helpers; 4 | using LinkCrawler.Utils.Outputs; 5 | using LinkCrawler.Utils.Parsers; 6 | using LinkCrawler.Utils.Settings; 7 | using RestSharp; 8 | using System; 9 | using System.Collections.Generic; 10 | using System.Diagnostics; 11 | using System.Linq; 12 | 13 | namespace LinkCrawler 14 | { 15 | public class LinkCrawler 16 | { 17 | public string BaseUrl { get; set; } 18 | public bool CheckImages { get; set; } 19 | public RestRequest RestRequest { get; set; } 20 | public IEnumerable Outputs { get; set; } 21 | public IValidUrlParser ValidUrlParser { get; set; } 22 | public bool OnlyReportBrokenLinksToOutput { get; set; } 23 | public static List UrlList; 24 | private ISettings _settings; 25 | private Stopwatch _timer; 26 | 27 | public LinkCrawler(IEnumerable outputs, IValidUrlParser validUrlParser, ISettings settings) 28 | { 29 | BaseUrl = settings.BaseUrl; 30 | Outputs = outputs; 31 | ValidUrlParser = validUrlParser; 32 | CheckImages = settings.CheckImages; 33 | UrlList = new List(); 34 | RestRequest = new RestRequest(Method.GET).SetHeader("Accept", "*/*"); 35 | OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput; 36 | _settings = settings; 37 | _timer = new Stopwatch(); 38 | } 39 | 40 | public void Start() 41 | { 42 | _timer.Start(); 43 | UrlList.Add(new LinkModel(BaseUrl)); 44 | SendRequest(BaseUrl); 45 | } 46 | 47 | public void SendRequest(string crawlUrl, string referrerUrl = "") 48 | { 49 | var requestModel = new RequestModel(crawlUrl, referrerUrl, BaseUrl); 50 | var restClient = new RestClient(new Uri(crawlUrl)) { FollowRedirects = false }; 51 | 52 | restClient.ExecuteAsync(RestRequest, response => 53 | { 54 | if (response == null) 55 | return; 56 | 57 | var responseModel = new ResponseModel(response, requestModel, _settings); 58 | ProcessResponse(responseModel); 59 | }); 60 | } 61 | 62 | public void ProcessResponse(IResponseModel responseModel) 63 | { 64 | WriteOutput(responseModel); 65 | 66 | if (responseModel.ShouldCrawl) 67 | CrawlForLinksInResponse(responseModel); 68 | } 69 | 70 | public void CrawlForLinksInResponse(IResponseModel responseModel) 71 | { 72 | var linksFoundInMarkup = MarkupHelpers.GetValidUrlListFromMarkup(responseModel.Markup, ValidUrlParser, CheckImages); 73 | 74 | foreach (var url in linksFoundInMarkup) 75 | { 76 | lock (UrlList) 77 | { 78 | if (UrlList.Where(l => l.Address == url).Count() > 0) 79 | continue; 80 | 81 | UrlList.Add(new LinkModel(url)); 82 | } 83 | SendRequest(url, responseModel.RequestedUrl); 84 | } 85 | } 86 | 87 | public void WriteOutput(IResponseModel responseModel) 88 | { 89 | if (!responseModel.IsSuccess) 90 | { 91 | foreach (var output in Outputs) 92 | { 93 | output.WriteError(responseModel); 94 | } 95 | } 96 | else if (!OnlyReportBrokenLinksToOutput) 97 | { 98 | foreach (var output in Outputs) 99 | { 100 | output.WriteInfo(responseModel); 101 | } 102 | } 103 | 104 | CheckIfFinal(responseModel); 105 | } 106 | 107 | private void CheckIfFinal(IResponseModel responseModel) 108 | { 109 | lock (UrlList) 110 | { 111 | 112 | // First set the status code for the completed link (this will set "CheckingFinished" to true) 113 | foreach (LinkModel lm in UrlList.Where(l => l.Address == responseModel.RequestedUrl)) 114 | { 115 | lm.StatusCode = responseModel.StatusCodeNumber; 116 | } 117 | 118 | // Then check to see whether there are any pending links left to check 119 | if ((UrlList.Count > 1) && (UrlList.Where(l => l.CheckingFinished == false).Count() == 0)) 120 | { 121 | FinaliseSession(); 122 | } 123 | } 124 | } 125 | 126 | private void FinaliseSession() 127 | { 128 | _timer.Stop(); 129 | if (_settings.PrintSummary) 130 | { 131 | List messages = new List(); 132 | messages.Add(""); // add blank line to differentiate summary from main output 133 | 134 | messages.Add("Processing complete. Checked " + UrlList.Count() + " links in " + _timer.ElapsedMilliseconds.ToString() + "ms"); 135 | 136 | messages.Add(""); 137 | messages.Add(" Status | # Links"); 138 | messages.Add(" -------+--------"); 139 | 140 | IEnumerable> StatusSummary = UrlList.GroupBy(link => link.StatusCode, link => link.Address); 141 | foreach(IGrouping statusGroup in StatusSummary) 142 | { 143 | messages.Add(String.Format(" {0} | {1,5}", statusGroup.Key, statusGroup.Count())); 144 | } 145 | 146 | foreach (var output in Outputs) 147 | { 148 | output.WriteInfo(messages.ToArray()); 149 | } 150 | } 151 | } 152 | } 153 | } -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/LinkCrawler.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {DB53303B-F9FB-4D77-B656-D05DB0420E6A} 8 | Exe 9 | Properties 10 | LinkCrawler 11 | LinkCrawler 12 | v4.5.2 13 | 512 14 | true 15 | 16 | 17 | AnyCPU 18 | true 19 | full 20 | false 21 | bin\Debug\ 22 | DEBUG;TRACE 23 | prompt 24 | 4 25 | 26 | 27 | AnyCPU 28 | pdbonly 29 | true 30 | bin\Release\ 31 | TRACE 32 | prompt 33 | 4 34 | 35 | 36 | 37 | ..\packages\Bootstrapper.2.0.3.13\lib\Net40\Bootstrapper.dll 38 | True 39 | 40 | 41 | ..\packages\Bootstrapper.StructureMap.2.0.3.13\lib\Net40\Bootstrapper.StructureMap.dll 42 | True 43 | 44 | 45 | ..\packages\CommonServiceLocator.StructureMapAdapter.Unofficial.3.0.4.125\lib\Net40\CommonServiceLocator.StructureMapAdapter.Unofficial.dll 46 | True 47 | 48 | 49 | ..\packages\HtmlAgilityPack.1.4.9\lib\Net45\HtmlAgilityPack.dll 50 | True 51 | 52 | 53 | ..\packages\CommonServiceLocator.1.3\lib\portable-net4+sl5+netcore45+wpa81+wp8\Microsoft.Practices.ServiceLocation.dll 54 | True 55 | 56 | 57 | ..\packages\RestSharp.105.2.3\lib\net452\RestSharp.dll 58 | True 59 | 60 | 61 | ..\packages\structuremap.3.0.4.125\lib\net40\StructureMap.dll 62 | True 63 | 64 | 65 | ..\packages\structuremap.3.0.4.125\lib\net40\StructureMap.Net4.dll 66 | True 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | Designer 111 | 112 | 113 | 114 | 115 | 116 | 123 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Models/IResponseModel.cs: -------------------------------------------------------------------------------- 1 | using System.Net; 2 | 3 | namespace LinkCrawler.Models 4 | { 5 | public interface IResponseModel 6 | { 7 | string Markup { get; } 8 | string RequestedUrl { get; } 9 | string ReferrerUrl { get; } 10 | HttpStatusCode StatusCode { get; } 11 | int StatusCodeNumber { get; } 12 | bool IsSuccess { get; } 13 | bool ShouldCrawl { get; } 14 | string ToString(); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Models/LinkModel.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace LinkCrawler.Models 8 | { 9 | public class LinkModel 10 | { 11 | public string Address { get; private set; } 12 | public bool CheckingFinished { get; private set; } 13 | private int _statusCode; 14 | 15 | public int StatusCode 16 | { 17 | get 18 | { 19 | return _statusCode; 20 | } 21 | set 22 | { 23 | _statusCode = value; 24 | CheckingFinished = true; 25 | } 26 | } 27 | 28 | public LinkModel (string address) 29 | { 30 | Address = address; 31 | CheckingFinished = false; 32 | } 33 | 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Models/RequestModel.cs: -------------------------------------------------------------------------------- 1 | using RestSharp; 2 | 3 | namespace LinkCrawler.Models 4 | { 5 | public class RequestModel 6 | { 7 | public string Url; 8 | public string ReferrerUrl; 9 | public bool IsInternalUrl { get; set; } 10 | public RestClient Client; 11 | 12 | public RequestModel(string url, string referrerUrl, string baseUrl) 13 | { 14 | Url = url; 15 | IsInternalUrl = url.StartsWith(baseUrl); 16 | ReferrerUrl = referrerUrl; 17 | Client = new RestClient(Url); 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Models/ResponseModel.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Utils.Extensions; 2 | using RestSharp; 3 | using System; 4 | using System.Net; 5 | using LinkCrawler.Utils.Settings; 6 | 7 | namespace LinkCrawler.Models 8 | { 9 | public class ResponseModel : IResponseModel 10 | { 11 | public string Markup { get; } 12 | public string RequestedUrl { get; } 13 | public string ReferrerUrl { get; } 14 | 15 | public HttpStatusCode StatusCode { get; } 16 | public int StatusCodeNumber { get { return (int)StatusCode; } } 17 | public bool IsSuccess { get; } 18 | public bool ShouldCrawl { get; } 19 | 20 | public ResponseModel(IRestResponse restResponse, RequestModel requestModel, ISettings settings) 21 | { 22 | ReferrerUrl = requestModel.ReferrerUrl; 23 | StatusCode = restResponse.StatusCode; 24 | RequestedUrl = requestModel.Url; 25 | IsSuccess = settings.IsSuccess(StatusCode); 26 | if (!IsSuccess) 27 | return; 28 | Markup = restResponse.Content; 29 | ShouldCrawl = IsSuccess && requestModel.IsInternalUrl && restResponse.IsHtmlDocument(); 30 | } 31 | 32 | public override string ToString() 33 | { 34 | if (!IsSuccess) 35 | return string.Format("{0}\t{1}\t{2}{3}\tReferer:\t{4}", StatusCodeNumber, StatusCode, RequestedUrl, Environment.NewLine, ReferrerUrl); 36 | 37 | return string.Format("{0}\t{1}\t{2}", StatusCodeNumber, StatusCode, RequestedUrl); 38 | } 39 | } 40 | } -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Program.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Utils; 2 | using StructureMap; 3 | using System; 4 | using LinkCrawler.Utils.Parsers; 5 | using LinkCrawler.Utils.Settings; 6 | 7 | namespace LinkCrawler 8 | { 9 | class Program 10 | { 11 | static void Main(string[] args) 12 | { 13 | 14 | using (var container = Container.For()) 15 | { 16 | var linkCrawler = container.GetInstance(); 17 | if (args.Length > 0) 18 | { 19 | string parsed; 20 | var validUrlParser = new ValidUrlParser(new Settings()); 21 | var result = validUrlParser.Parse(args[0], out parsed); 22 | if(result) 23 | linkCrawler.BaseUrl = parsed; 24 | } 25 | linkCrawler.Start(); 26 | Console.Read(); 27 | } 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("LinkCrawler")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("Making Waves")] 12 | [assembly: AssemblyProduct("LinkCrawler")] 13 | [assembly: AssemblyCopyright("Copyright © Making Waves 2016")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("db53303b-f9fb-4d77-b656-d05db0420e6a")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Clients/ISlackClient.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Models; 2 | 3 | namespace LinkCrawler.Utils.Clients 4 | { 5 | public interface ISlackClient 6 | { 7 | string WebHookUrl { get; set; } 8 | string BotName { get; set; } 9 | string BotIcon { get; set; } 10 | string MessageFormat { get; set; } 11 | bool HasWebHookUrl { get; } 12 | void NotifySlack(IResponseModel responseModel); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Clients/SlackClient.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Models; 2 | using LinkCrawler.Utils.Settings; 3 | using RestSharp; 4 | 5 | namespace LinkCrawler.Utils.Clients 6 | { 7 | public class SlackClient : ISlackClient 8 | { 9 | public string WebHookUrl { get; set; } 10 | public string BotName { get; set; } 11 | public string BotIcon { get; set; } 12 | public string MessageFormat { get; set; } 13 | 14 | public bool HasWebHookUrl 15 | { 16 | get { return !string.IsNullOrEmpty(WebHookUrl); } 17 | } 18 | 19 | public SlackClient(ISettings settings) 20 | { 21 | WebHookUrl = settings.SlackWebHookUrl; 22 | BotName = settings.SlackWebHookBotName; 23 | BotIcon = settings.SlackWebHookBotIconEmoji; 24 | MessageFormat = settings.SlackWebHookBotMessageFormat; 25 | } 26 | 27 | public void NotifySlack(IResponseModel responseModel) 28 | { 29 | if (!HasWebHookUrl) 30 | return; 31 | 32 | var message = string.Format(MessageFormat, responseModel.RequestedUrl, responseModel.StatusCodeNumber, responseModel.ReferrerUrl); 33 | 34 | var client = new RestClient(WebHookUrl); 35 | var request = new RestRequest(Method.POST) { RequestFormat = DataFormat.Json }; 36 | request.AddBody( 37 | new 38 | { 39 | text = message, 40 | username = BotName, 41 | icon_emoji = BotIcon, 42 | mrkdwn = true 43 | }); 44 | 45 | client.ExecuteAsync(request, null); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Extensions/HttpStatusCodeExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | using System.Net; 4 | using System.Text.RegularExpressions; 5 | 6 | namespace LinkCrawler.Utils.Extensions 7 | { 8 | public static class HttpStatusCodeExtensions 9 | { 10 | /// 11 | /// This method will determine if an HttpStatusCode represents a "success" or not 12 | /// based on the configuration string you pass in. 13 | /// You can pass literal codes like 100, 200, 404 14 | /// Or you can pass in simple patterns using "x"s as wildcards like: 1xx, xx4 15 | /// 16 | /// The HttpStatus code to use 17 | /// CSV of HttpStatus codes 18 | /// 19 | public static bool IsSuccess(this HttpStatusCode @this, string configuredCodes) 20 | { 21 | var codeCollection = configuredCodes 22 | .Split(new string[] {","}, StringSplitOptions.RemoveEmptyEntries) // split into array 23 | .Select(c => c.Trim().ToLower()) // trim off any spaces, and make lowercase (this allows for "100,20X" and "100, 20x)" 24 | .ToList(); 25 | 26 | var codeNumberAsString = ((int) @this).ToString(); 27 | 28 | // test for simple exact matching 29 | if (codeCollection.Contains(codeNumberAsString)) 30 | return true; 31 | 32 | // replace X's with regex single character wildcard 33 | var codeCollectionRegexs = codeCollection 34 | .Where(c => c.Contains("x")) 35 | .Select(c => c) 36 | .ToList(); 37 | 38 | // if there aren't any codes with wildcards, just bail now 39 | if (!codeCollectionRegexs.Any()) 40 | return false; 41 | 42 | return codeCollectionRegexs 43 | .Select(ToRegex) 44 | .Any(x => Regex.IsMatch(codeNumberAsString, x)); 45 | } 46 | 47 | private static string ToRegex(string s) 48 | { 49 | return s.Replace("x", "[0123456789]"); 50 | } 51 | } 52 | } -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Extensions/RegexExtensions.cs: -------------------------------------------------------------------------------- 1 | using System.Text.RegularExpressions; 2 | 3 | namespace LinkCrawler.Utils.Extensions 4 | { 5 | public static class RegexExtensions 6 | { 7 | public static bool IsNotMatch(this Regex regex, string str) 8 | { 9 | return !regex.IsMatch(str); 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Extensions/RestRequestExtensions.cs: -------------------------------------------------------------------------------- 1 | using RestSharp; 2 | 3 | namespace LinkCrawler.Utils.Extensions 4 | { 5 | public static class RestRequestExtensions 6 | { 7 | public static RestRequest SetHeader(this RestRequest restRequest, string name, string value) 8 | { 9 | restRequest.AddHeader(name, value); 10 | return restRequest; 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Extensions/RestResponseExtensions.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Utils.Settings; 2 | using RestSharp; 3 | 4 | namespace LinkCrawler.Utils.Extensions 5 | { 6 | public static class RestResponseExtensions 7 | { 8 | public static bool IsHtmlDocument(this IRestResponse restResponse) 9 | { 10 | return restResponse.ContentType.StartsWith(Constants.Response.ContentTypeTextHtml); 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Extensions/StringExtensions.cs: -------------------------------------------------------------------------------- 1 | using System.Globalization; 2 | 3 | namespace LinkCrawler.Utils.Extensions 4 | { 5 | public static class StringExtensions 6 | { 7 | public static bool StartsWithIgnoreCase(this string str, string startsWith) 8 | { 9 | if (string.IsNullOrEmpty(str) && string.IsNullOrEmpty(startsWith)) 10 | return true; 11 | 12 | if (string.IsNullOrEmpty(str) || string.IsNullOrEmpty(startsWith)) 13 | return false; 14 | 15 | return str.StartsWith(startsWith, true, CultureInfo.InvariantCulture); 16 | } 17 | public static bool ToBool(this string str) 18 | { 19 | bool parsed; 20 | bool.TryParse(str, out parsed); 21 | return parsed; 22 | } 23 | 24 | public static string TrimEnd(this string input, string suffixToRemove) 25 | { 26 | if (input != null && suffixToRemove != null 27 | && input.EndsWith(suffixToRemove)) 28 | { 29 | return input.Substring(0, input.Length - suffixToRemove.Length); 30 | } 31 | return input; 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Extensions/UriExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace LinkCrawler.Utils.Extensions 4 | { 5 | public static class UriExtensions 6 | { 7 | public static string RemoveSegments(this Uri uri) 8 | { 9 | var uriString = uri.ToString(); 10 | var segments = string.Join(string.Empty, uri.Segments); 11 | return uriString.TrimEnd(segments); 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Helpers/ConsoleHelper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace LinkCrawler.Utils.Helpers 4 | { 5 | public static class ConsoleHelper 6 | { 7 | public static void WriteError(string text) 8 | { 9 | Console.ForegroundColor = ConsoleColor.Red; 10 | Console.WriteLine(text); 11 | Console.ResetColor(); 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Helpers/MarkupHelpers.cs: -------------------------------------------------------------------------------- 1 | using HtmlAgilityPack; 2 | using LinkCrawler.Utils.Parsers; 3 | using LinkCrawler.Utils.Settings; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | 7 | namespace LinkCrawler.Utils.Helpers 8 | { 9 | public static class MarkupHelpers 10 | { 11 | private static List GetAllUrlsFromHtmlDocument(string markup, string searchPattern, string attribute) 12 | { 13 | var htmlDoc = new HtmlDocument(); 14 | htmlDoc.LoadHtml(markup); 15 | var nodes = htmlDoc.DocumentNode.SelectNodes(searchPattern); 16 | 17 | if (nodes == null || !nodes.Any()) 18 | return new List(); 19 | 20 | return nodes.Select(x => x.GetAttributeValue(attribute, string.Empty).TrimEnd('/')).ToList(); 21 | } 22 | 23 | public static List GetAllUrlsFromMarkup(string markup, bool checkImageTags) 24 | { 25 | var linkUrls = GetAllUrlsFromHtmlDocument(markup, Constants.Html.LinkSearchPattern, Constants.Html.Href); 26 | if (checkImageTags) 27 | { 28 | var imgUrls = GetAllUrlsFromHtmlDocument(markup, Constants.Html.ImgSearchPattern, Constants.Html.Src); 29 | linkUrls.AddRange(imgUrls); 30 | } 31 | return linkUrls; 32 | } 33 | 34 | /// 35 | /// Get's a list of all urls in markup and tires to fix the urls that Restsharp will have a problem with 36 | /// (i.e relative urls, urls with no sceme, mailto links..etc) 37 | /// 38 | /// List of urls that will work with restsharp for sending http get 39 | public static List GetValidUrlListFromMarkup(string markup, IValidUrlParser parser, bool checkImages) 40 | { 41 | var urlList = GetAllUrlsFromMarkup(markup, checkImages); 42 | var validUrlList = new List(); 43 | 44 | foreach (var url in urlList) 45 | { 46 | string validUrl; 47 | if (parser.Parse(url, out validUrl)) 48 | { 49 | validUrlList.Add(validUrl); 50 | } 51 | } 52 | return validUrlList; 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Outputs/ConsoleOutput.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using LinkCrawler.Models; 3 | using LinkCrawler.Utils.Helpers; 4 | 5 | namespace LinkCrawler.Utils.Outputs 6 | { 7 | public class ConsoleOutput : IOutput 8 | { 9 | public void WriteError(IResponseModel responseModel) 10 | { 11 | ConsoleHelper.WriteError(responseModel.ToString()); 12 | } 13 | 14 | public void WriteInfo(IResponseModel responseModel) 15 | { 16 | WriteInfo(new string[] { responseModel.ToString() }); 17 | } 18 | 19 | public void WriteInfo(String[] Info) 20 | { 21 | foreach(string line in Info) Console.WriteLine(line); 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Outputs/CsvOutput.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Models; 2 | using LinkCrawler.Utils.Settings; 3 | using System; 4 | using System.IO; 5 | 6 | namespace LinkCrawler.Utils.Outputs 7 | { 8 | public class CsvOutput : IOutput, IDisposable 9 | { 10 | private readonly ISettings _settings; 11 | public TextWriter _writer; 12 | 13 | public CsvOutput(ISettings settings) 14 | { 15 | _settings = settings; 16 | Setup(); 17 | } 18 | 19 | private void Setup() 20 | { 21 | var fileMode = _settings.CsvOverwrite ? FileMode.Create : FileMode.Append; 22 | var file = new FileStream(_settings.CsvFilePath, fileMode, FileAccess.Write); 23 | 24 | var streamWriter = new StreamWriter(file) {AutoFlush = true}; 25 | _writer = TextWriter.Synchronized(streamWriter); 26 | 27 | if (fileMode == FileMode.Create) 28 | { 29 | _writer.WriteLine("Code{0}Status{0}Url{0}Referer", _settings.CsvDelimiter); 30 | } 31 | } 32 | 33 | public void WriteError(IResponseModel responseModel) 34 | { 35 | Write(responseModel); 36 | } 37 | 38 | public void WriteInfo(IResponseModel responseModel) 39 | { 40 | Write(responseModel); 41 | } 42 | 43 | public void WriteInfo(String[] Info) 44 | { 45 | // Do nothing - string info is only for console 46 | } 47 | 48 | private void Write(IResponseModel responseModel) 49 | { 50 | _writer?.WriteLine("{1}{0}{2}{0}{3}{0}{4}", 51 | _settings.CsvDelimiter, 52 | responseModel.StatusCodeNumber, 53 | responseModel.StatusCode, 54 | responseModel.RequestedUrl, 55 | responseModel.ReferrerUrl); 56 | } 57 | 58 | public void Dispose() 59 | { 60 | _writer?.Close(); 61 | _writer?.Dispose(); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Outputs/IOutput.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Models; 2 | 3 | namespace LinkCrawler.Utils.Outputs 4 | { 5 | public interface IOutput 6 | { 7 | void WriteError(IResponseModel responseModel); 8 | void WriteInfo(IResponseModel responseModel); 9 | void WriteInfo(string[] InfoString); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Outputs/SlackOutput.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Models; 2 | using LinkCrawler.Utils.Clients; 3 | 4 | namespace LinkCrawler.Utils.Outputs 5 | { 6 | public class SlackOutput : IOutput 7 | { 8 | private readonly ISlackClient _slackClient; 9 | 10 | public SlackOutput(ISlackClient slackClient) 11 | { 12 | _slackClient = slackClient; 13 | } 14 | 15 | public void WriteError(IResponseModel responseModel) 16 | { 17 | _slackClient.NotifySlack(responseModel); 18 | } 19 | 20 | public void WriteInfo(IResponseModel responseModel) 21 | { 22 | // Write nothing to Slack 23 | } 24 | 25 | public void WriteInfo(string[] Info) 26 | { 27 | // Write nothing to Slack 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Parsers/IValidUrlParser.cs: -------------------------------------------------------------------------------- 1 | using System.Text.RegularExpressions; 2 | 3 | namespace LinkCrawler.Utils.Parsers 4 | { 5 | public interface IValidUrlParser 6 | { 7 | Regex Regex { get; set; } 8 | string BaseUrl { get; set; } 9 | bool Parse(string url, out string validUrl); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Parsers/ValidUrlParser.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Utils.Extensions; 2 | using LinkCrawler.Utils.Settings; 3 | using System; 4 | using System.Text.RegularExpressions; 5 | 6 | namespace LinkCrawler.Utils.Parsers 7 | { 8 | public class ValidUrlParser : IValidUrlParser 9 | { 10 | public Regex Regex { get; set; } 11 | public string BaseUrl { get; set; } 12 | public ValidUrlParser(ISettings settings) 13 | { 14 | Regex = new Regex(settings.ValidUrlRegex); 15 | var baseUri = new Uri(settings.BaseUrl); 16 | BaseUrl = baseUri.RemoveSegments(); 17 | } 18 | 19 | public bool Parse(string url, out string validUrl) 20 | { 21 | validUrl = string.Empty; 22 | 23 | if (string.IsNullOrEmpty(url)) 24 | return false; 25 | 26 | Uri parsedUri; 27 | 28 | if (Regex.IsNotMatch(url) 29 | || !Uri.TryCreate(url, UriKind.RelativeOrAbsolute, out parsedUri)) 30 | return false; 31 | 32 | if (parsedUri.IsAbsoluteUri) 33 | { 34 | validUrl = url; 35 | return true; 36 | } 37 | if (url.StartsWith("//")) 38 | { 39 | var newUrl = string.Concat("http:", url); 40 | validUrl = newUrl; 41 | return true; 42 | } 43 | if (url.StartsWith("/")) 44 | { 45 | var newUrl = string.Concat(BaseUrl, url); 46 | validUrl = newUrl; 47 | return true; 48 | } 49 | return false; 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Settings/Constants.cs: -------------------------------------------------------------------------------- 1 | namespace LinkCrawler.Utils.Settings 2 | { 3 | public static class Constants 4 | { 5 | public static class AppSettings 6 | { 7 | public const string BaseUrl = "BaseUrl"; 8 | public const string ValidUrlRegex = "ValidUrlRegex"; 9 | public const string OnlyReportBrokenLinksToOutput = "OnlyReportBrokenLinksToOutput"; 10 | public const string CheckImages = "CheckImages"; 11 | public const string SlackWebHookUrl = "Slack.WebHook.Url"; 12 | public const string SlackWebHookBotName = "Slack.WebHook.Bot.Name"; 13 | public const string SlackWebHookBotIconEmoji = "Slack.WebHook.Bot.IconEmoji"; 14 | public const string SlackWebHookBotMessageFormat = "Slack.WebHook.Bot.MessageFormat"; 15 | public const string CsvFilePath = "Csv.FilePath"; 16 | public const string CsvOverwrite = "Csv.Overwrite"; 17 | public const string CsvDelimiter = "Csv.Delimiter"; 18 | public const string SuccessHttpStatusCodes = "SuccessHttpStatusCodes"; 19 | public const string OutputProviders = "outputProviders"; 20 | public const string PrintSummary = "PrintSummary"; 21 | } 22 | 23 | public static class Response 24 | { 25 | public const string ContentTypeTextHtml = "text/html"; 26 | } 27 | 28 | public static class Html 29 | { 30 | public const string Href = "href"; 31 | public const string Src = "src"; 32 | public const string LinkSearchPattern = "//a[@href]"; 33 | public const string ImgSearchPattern = "//img[@src]"; 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Settings/ISettings.cs: -------------------------------------------------------------------------------- 1 | using System.Net; 2 | 3 | namespace LinkCrawler.Utils.Settings 4 | { 5 | public interface ISettings 6 | { 7 | string BaseUrl { get; } 8 | 9 | string ValidUrlRegex { get; } 10 | 11 | bool CheckImages { get; } 12 | 13 | bool OnlyReportBrokenLinksToOutput { get; } 14 | 15 | string SlackWebHookUrl { get; } 16 | 17 | string SlackWebHookBotName { get; } 18 | 19 | string SlackWebHookBotIconEmoji { get; } 20 | 21 | string SlackWebHookBotMessageFormat { get; } 22 | 23 | string CsvFilePath { get; } 24 | 25 | bool CsvOverwrite { get; } 26 | 27 | string CsvDelimiter { get; } 28 | 29 | bool IsSuccess(HttpStatusCode statusCode); 30 | 31 | bool PrintSummary { get; } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Settings/MockSettings.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Utils.Extensions; 2 | using System.Net; 3 | 4 | namespace LinkCrawler.Utils.Settings { 5 | public class MockSettings : ISettings { 6 | 7 | public string BaseUrl => "https://github.com"; 8 | 9 | public bool CheckImages => true; 10 | 11 | public string CsvDelimiter => ";"; 12 | 13 | public string CsvFilePath => @"C:\tmp\output.csv"; 14 | 15 | public bool CsvOverwrite => true; 16 | 17 | public bool OnlyReportBrokenLinksToOutput => false; 18 | 19 | public string SlackWebHookBotIconEmoji => ":homer:"; 20 | 21 | public string SlackWebHookBotMessageFormat => "*Doh! There is a link not working* Url: {0} Statuscode: {1} The link is placed on this page: {2}"; 22 | 23 | public string SlackWebHookBotName => "Homer Bot"; 24 | public bool PrintSummary => false; 25 | private bool IncludeWebHookUrl { get; set; } 26 | public string SlackWebHookUrl 27 | { 28 | get 29 | { 30 | return IncludeWebHookUrl ? @"https://hooks.slack.com/services/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" : ""; 31 | } 32 | } 33 | 34 | public string ValidUrlRegex => @"(^http[s]?:\/{2})|(^www)|(^\/{1,2})"; 35 | 36 | public bool IsSuccess(HttpStatusCode statusCode) { 37 | return statusCode.IsSuccess("1xx,2xx,3xx"); 38 | } 39 | 40 | public MockSettings(bool includeWebHookUrl) { 41 | this.IncludeWebHookUrl = includeWebHookUrl; 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/Settings/Settings.cs: -------------------------------------------------------------------------------- 1 | using LinkCrawler.Utils.Extensions; 2 | using System.Configuration; 3 | using System.Net; 4 | 5 | namespace LinkCrawler.Utils.Settings 6 | { 7 | public class Settings : ISettings 8 | { 9 | public string BaseUrl => 10 | ConfigurationManager.AppSettings[Constants.AppSettings.BaseUrl].Trim('/'); 11 | 12 | public string ValidUrlRegex => 13 | ConfigurationManager.AppSettings[Constants.AppSettings.ValidUrlRegex]; 14 | 15 | public bool CheckImages => 16 | ConfigurationManager.AppSettings[Constants.AppSettings.CheckImages].ToBool(); 17 | 18 | public bool OnlyReportBrokenLinksToOutput => 19 | ConfigurationManager.AppSettings[Constants.AppSettings.OnlyReportBrokenLinksToOutput].ToBool(); 20 | 21 | public string SlackWebHookUrl => 22 | ConfigurationManager.AppSettings[Constants.AppSettings.SlackWebHookUrl]; 23 | 24 | public string SlackWebHookBotName => 25 | ConfigurationManager.AppSettings[Constants.AppSettings.SlackWebHookBotName]; 26 | 27 | public string SlackWebHookBotIconEmoji => 28 | ConfigurationManager.AppSettings[Constants.AppSettings.SlackWebHookBotIconEmoji]; 29 | 30 | public string SlackWebHookBotMessageFormat => 31 | ConfigurationManager.AppSettings[Constants.AppSettings.SlackWebHookBotMessageFormat]; 32 | 33 | public string CsvFilePath => 34 | ConfigurationManager.AppSettings[Constants.AppSettings.CsvFilePath]; 35 | 36 | public bool CsvOverwrite => 37 | ConfigurationManager.AppSettings[Constants.AppSettings.CsvOverwrite].ToBool(); 38 | 39 | public string CsvDelimiter => 40 | ConfigurationManager.AppSettings[Constants.AppSettings.CsvDelimiter]; 41 | 42 | public bool PrintSummary => 43 | ConfigurationManager.AppSettings[Constants.AppSettings.PrintSummary].ToBool(); 44 | 45 | public bool IsSuccess(HttpStatusCode statusCode) 46 | { 47 | var configuredCodes = ConfigurationManager.AppSettings[Constants.AppSettings.SuccessHttpStatusCodes] ?? ""; 48 | return statusCode.IsSuccess(configuredCodes); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/Utils/StructureMapRegistry.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections; 3 | using System.Configuration; 4 | using System.Linq; 5 | using LinkCrawler.Utils.Outputs; 6 | using LinkCrawler.Utils.Settings; 7 | using StructureMap.Configuration.DSL; 8 | using StructureMap.Graph; 9 | 10 | namespace LinkCrawler.Utils 11 | { 12 | public class StructureMapRegistry : Registry 13 | { 14 | public StructureMapRegistry() 15 | { 16 | Scan(scan => 17 | { 18 | scan.TheCallingAssembly(); 19 | scan.WithDefaultConventions(); 20 | }); 21 | 22 | var providers = (ConfigurationManager.GetSection(Constants.AppSettings.OutputProviders) as Hashtable)? 23 | .Cast() 24 | .ToDictionary(d => d.Key.ToString(), d => d.Value.ToString()); 25 | 26 | if (providers != null) 27 | { 28 | var pluginType = typeof(IOutput); 29 | 30 | foreach (var provider in providers) 31 | { 32 | var concreteType = Type.GetType(provider.Value); 33 | 34 | if (concreteType == null) 35 | { 36 | throw new ConfigurationErrorsException(string.Format( 37 | "Output provider '{0}' not found: {1}", 38 | provider.Key, 39 | provider.Value 40 | )); 41 | } 42 | 43 | if (!concreteType.GetInterfaces().Contains(pluginType)) 44 | { 45 | throw new ConfigurationErrorsException(string.Format( 46 | "Output provider '{0}' does not implement IOutput: {1}", 47 | provider.Key, 48 | provider.Value 49 | )); 50 | } 51 | 52 | For(pluginType).Add(concreteType).Named(provider.Key); 53 | } 54 | } 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /LinkCrawler/LinkCrawler/packages.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler.Test/LinkCrawler.Test.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net7.0 5 | enable 6 | enable 7 | 8 | false 9 | 10 | 11 | 12 | 13 | 14 | 15 | runtime; build; native; contentfiles; analyzers; buildtransitive 16 | all 17 | 18 | 19 | runtime; build; native; contentfiles; analyzers; buildtransitive 20 | all 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler.Test/UnitTest1.cs: -------------------------------------------------------------------------------- 1 | namespace LinkCrawler.Test 2 | { 3 | public class UnitTest1 4 | { 5 | [Fact] 6 | public void Test1() 7 | { 8 | 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler.Test/Usings.cs: -------------------------------------------------------------------------------- 1 | global using Xunit; -------------------------------------------------------------------------------- /LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.4.33103.184 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkCrawler", "LinkCrawler\LinkCrawler.csproj", "{DD0C6451-84A7-4DA6-8BF5-32EB97BE32FE}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {DD0C6451-84A7-4DA6-8BF5-32EB97BE32FE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {DD0C6451-84A7-4DA6-8BF5-32EB97BE32FE}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {DD0C6451-84A7-4DA6-8BF5-32EB97BE32FE}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {DD0C6451-84A7-4DA6-8BF5-32EB97BE32FE}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {0815F252-9C5A-42C9-A1CC-743850B55836} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler/LinkCrawler.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net7.0 6 | enable 7 | enable 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler/Program.cs: -------------------------------------------------------------------------------- 1 | // See https://aka.ms/new-console-template for more information 2 | Console.WriteLine("Hello, World!"); 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LinkCrawler 2 | Simple C# console application that will crawl the given webpage for broken image-tags and hyperlinks. The result of this will be written to output. Right now we have these outputs: console, csv, slack. 3 | 4 | 5 | ## Why? 6 | Because it could be useful to know when a webpage you have responsibility for displays broken links to it's users. I have this running continuously, but you don't have to. For instance, after upgrading your CMS, changing database-scheme, migrating content etc, it can be relevant to know if this did or did not not introduce broken links. Just run this tool one time and you will know exactly how many links are broken, where they link to, and where they are located. 7 | 8 | ## Build 9 | Clone repo :point_right: open solution in Visual Studio :point_right: build :facepunch: 10 | 11 | AppVeyor is used as CI, so when code is pushed to this repo the solution will get built and all tests will be run. 12 | 13 | | Branch | Build status | 14 | | :----- | :---------------------------------------| 15 | | develop | [![Build status](https://ci.appveyor.com/api/projects/status/syw3l7xeicy7xc0b/branch/develop?svg=true)](https://ci.appveyor.com/project/hmol/linkcrawler/branch/develop) | 16 | | master | [![Build status](https://ci.appveyor.com/api/projects/status/syw3l7xeicy7xc0b/branch/master?svg=true)](https://ci.appveyor.com/project/hmol/linkcrawler/branch/master) | 17 | 18 | ## AppSettings 19 | 20 | | Key | Usage | 21 | | :-------------------------- | :---------------------------------------| 22 | | ```BaseUrl ``` | Base url for site to crawl | 23 | | ```SuccessHttpStatusCodes``` | HTTP status codes that are considered "successful". Example: "1xx,2xx,302,303" | 24 | | ```CheckImages``` | If true, ``` that controls what output should be used. 37 | 38 | ## Output to file 39 | ```LinkCrawler.exe >> crawl.log``` will save output to file. 40 | ![Slack](http://henrikm.com/content/images/2016/Feb/as-file.png "Output to file") 41 | 42 | ## Output to slack 43 | If configured correctly, the defined slack-webhook will be notified about broken links. 44 | ![Slack](http://henrikm.com/content/images/2016/Feb/blurred1.jpg "Slack") 45 | 46 | ##How I use it 47 | I have it running as an Webjob in Azure, scheduled every 4 days. It will notify the slack-channel where the editors of the website dwells. 48 | 49 | Creating a webjob is simple. Just put your compiled project files (/bin/) inside a .zip, and upload it. 50 | ![Slack](http://henrikm.com/content/images/2016/Feb/azure-webjob-setup-1.PNG "WebJob") 51 | 52 | Schedule it. 53 | 54 | ![Slack](http://henrikm.com/content/images/2016/Feb/azure-scheduele.PNG) 55 | 56 | The output of a webjob is available because Azure saves it in log files. 57 | ![Slack](http://henrikm.com/content/images/2016/Feb/azure-log.PNG) 58 | 59 | 60 | Read more about Azure Webjobs: https://azure.microsoft.com/en-us/documentation/articles/web-sites-create-web-jobs/ 61 | 62 | Read more about Slack incoming webhooks: https://api.slack.com/incoming-webhooks 63 | --------------------------------------------------------------------------------