├── .gitattributes
├── .gitignore
├── LICENSE.md
├── LinkCrawler
├── LinkCrawler.Tests
│ ├── LinkCrawler.Tests.csproj
│ ├── LinkCrawlerTests.cs
│ ├── Properties
│ │ └── AssemblyInfo.cs
│ ├── UtilsTests
│ │ ├── ClientsTests
│ │ │ └── SlackClientTests.cs
│ │ ├── ExtensionsTests
│ │ │ ├── HttpStatusCodeExtensionsTests.cs
│ │ │ ├── RegexExtensionsTest.cs
│ │ │ └── StringExtensionsTests.cs
│ │ ├── OutputsTests
│ │ │ └── SlackOutputTests.cs
│ │ └── ParsersTests
│ │ │ └── ValidUrlParserTests.cs
│ └── packages.config
├── LinkCrawler.sln
└── LinkCrawler
│ ├── App.config
│ ├── LinkCrawler.cs
│ ├── LinkCrawler.csproj
│ ├── Models
│ ├── IResponseModel.cs
│ ├── LinkModel.cs
│ ├── RequestModel.cs
│ └── ResponseModel.cs
│ ├── Program.cs
│ ├── Properties
│ └── AssemblyInfo.cs
│ ├── Utils
│ ├── Clients
│ │ ├── ISlackClient.cs
│ │ └── SlackClient.cs
│ ├── Extensions
│ │ ├── HttpStatusCodeExtensions.cs
│ │ ├── RegexExtensions.cs
│ │ ├── RestRequestExtensions.cs
│ │ ├── RestResponseExtensions.cs
│ │ ├── StringExtensions.cs
│ │ └── UriExtensions.cs
│ ├── Helpers
│ │ ├── ConsoleHelper.cs
│ │ └── MarkupHelpers.cs
│ ├── Outputs
│ │ ├── ConsoleOutput.cs
│ │ ├── CsvOutput.cs
│ │ ├── IOutput.cs
│ │ └── SlackOutput.cs
│ ├── Parsers
│ │ ├── IValidUrlParser.cs
│ │ └── ValidUrlParser.cs
│ ├── Settings
│ │ ├── Constants.cs
│ │ ├── ISettings.cs
│ │ ├── MockSettings.cs
│ │ └── Settings.cs
│ └── StructureMapRegistry.cs
│ └── packages.config
├── LinkCrawler_DotNet_7
└── LinkCrawler
│ ├── LinkCrawler.Test
│ ├── LinkCrawler.Test.csproj
│ ├── UnitTest1.cs
│ └── Usings.cs
│ ├── LinkCrawler.sln
│ └── LinkCrawler
│ ├── LinkCrawler.csproj
│ └── Program.cs
└── README.md
/.gitattributes:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Set default behavior to automatically normalize line endings.
3 | ###############################################################################
4 | * text=auto
5 |
6 | ###############################################################################
7 | # Set default behavior for command prompt diff.
8 | #
9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs diff=csharp
14 |
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln merge=binary
26 | #*.csproj merge=binary
27 | #*.vbproj merge=binary
28 | #*.vcxproj merge=binary
29 | #*.vcproj merge=binary
30 | #*.dbproj merge=binary
31 | #*.fsproj merge=binary
32 | #*.lsproj merge=binary
33 | #*.wixproj merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj merge=binary
36 | #*.wwaproj merge=binary
37 |
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg binary
44 | #*.png binary
45 | #*.gif binary
46 |
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | #
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the
52 | # entries below.
53 | ###############################################################################
54 | #*.doc diff=astextplain
55 | #*.DOC diff=astextplain
56 | #*.docx diff=astextplain
57 | #*.DOCX diff=astextplain
58 | #*.dot diff=astextplain
59 | #*.DOT diff=astextplain
60 | #*.pdf diff=astextplain
61 | #*.PDF diff=astextplain
62 | #*.rtf diff=astextplain
63 | #*.RTF diff=astextplain
64 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 |
4 | # User-specific files
5 | *.suo
6 | *.user
7 | *.userosscache
8 | *.sln.docstates
9 |
10 | # User-specific files (MonoDevelop/Xamarin Studio)
11 | *.userprefs
12 |
13 | # Build results
14 | [Dd]ebug/
15 | [Dd]ebugPublic/
16 | [Rr]elease/
17 | [Rr]eleases/
18 | x64/
19 | x86/
20 | bld/
21 | [Bb]in/
22 | [Oo]bj/
23 |
24 | # Visual Studio 2015 cache/options directory
25 | .vs/
26 | # Uncomment if you have tasks that create the project's static files in wwwroot
27 | #wwwroot/
28 |
29 | # MSTest test Results
30 | [Tt]est[Rr]esult*/
31 | [Bb]uild[Ll]og.*
32 |
33 | # NUNIT
34 | *.VisualState.xml
35 | TestResult.xml
36 |
37 | # Build Results of an ATL Project
38 | [Dd]ebugPS/
39 | [Rr]eleasePS/
40 | dlldata.c
41 |
42 | # DNX
43 | project.lock.json
44 | artifacts/
45 |
46 | *_i.c
47 | *_p.c
48 | *_i.h
49 | *.ilk
50 | *.meta
51 | *.obj
52 | *.pch
53 | *.pdb
54 | *.pgc
55 | *.pgd
56 | *.rsp
57 | *.sbr
58 | *.tlb
59 | *.tli
60 | *.tlh
61 | *.tmp
62 | *.tmp_proj
63 | *.log
64 | *.vspscc
65 | *.vssscc
66 | .builds
67 | *.pidb
68 | *.svclog
69 | *.scc
70 |
71 | # Chutzpah Test files
72 | _Chutzpah*
73 |
74 | # Visual C++ cache files
75 | ipch/
76 | *.aps
77 | *.ncb
78 | *.opendb
79 | *.opensdf
80 | *.sdf
81 | *.cachefile
82 |
83 | # Visual Studio profiler
84 | *.psess
85 | *.vsp
86 | *.vspx
87 | *.sap
88 |
89 | # TFS 2012 Local Workspace
90 | $tf/
91 |
92 | # Guidance Automation Toolkit
93 | *.gpState
94 |
95 | # ReSharper is a .NET coding add-in
96 | _ReSharper*/
97 | *.[Rr]e[Ss]harper
98 | *.DotSettings.user
99 |
100 | # JustCode is a .NET coding add-in
101 | .JustCode
102 |
103 | # TeamCity is a build add-in
104 | _TeamCity*
105 |
106 | # DotCover is a Code Coverage Tool
107 | *.dotCover
108 |
109 | # NCrunch
110 | _NCrunch_*
111 | .*crunch*.local.xml
112 | nCrunchTemp_*
113 |
114 | # MightyMoose
115 | *.mm.*
116 | AutoTest.Net/
117 |
118 | # Web workbench (sass)
119 | .sass-cache/
120 |
121 | # Installshield output folder
122 | [Ee]xpress/
123 |
124 | # DocProject is a documentation generator add-in
125 | DocProject/buildhelp/
126 | DocProject/Help/*.HxT
127 | DocProject/Help/*.HxC
128 | DocProject/Help/*.hhc
129 | DocProject/Help/*.hhk
130 | DocProject/Help/*.hhp
131 | DocProject/Help/Html2
132 | DocProject/Help/html
133 |
134 | # Click-Once directory
135 | publish/
136 |
137 | # Publish Web Output
138 | *.[Pp]ublish.xml
139 | *.azurePubxml
140 | # TODO: Comment the next line if you want to checkin your web deploy settings
141 | # but database connection strings (with potential passwords) will be unencrypted
142 | *.pubxml
143 | *.publishproj
144 |
145 | # NuGet Packages
146 | *.nupkg
147 | # The packages folder can be ignored because of Package Restore
148 | **/packages/*
149 | # except build/, which is used as an MSBuild target.
150 | !**/packages/build/
151 | # Uncomment if necessary however generally it will be regenerated when needed
152 | #!**/packages/repositories.config
153 | # NuGet v3's project.json files produces more ignoreable files
154 | *.nuget.props
155 | *.nuget.targets
156 |
157 | # Microsoft Azure Build Output
158 | csx/
159 | *.build.csdef
160 |
161 | # Microsoft Azure Emulator
162 | ecf/
163 | rcf/
164 |
165 | # Microsoft Azure ApplicationInsights config file
166 | ApplicationInsights.config
167 |
168 | # Windows Store app package directory
169 | AppPackages/
170 | BundleArtifacts/
171 |
172 | # Visual Studio cache files
173 | # files ending in .cache can be ignored
174 | *.[Cc]ache
175 | # but keep track of directories ending in .cache
176 | !*.[Cc]ache/
177 |
178 | # Others
179 | ClientBin/
180 | ~$*
181 | *~
182 | *.dbmdl
183 | *.dbproj.schemaview
184 | *.pfx
185 | *.publishsettings
186 | node_modules/
187 | orleans.codegen.cs
188 |
189 | # RIA/Silverlight projects
190 | Generated_Code/
191 |
192 | # Backup & report files from converting an old project file
193 | # to a newer Visual Studio version. Backup files are not needed,
194 | # because we have git ;-)
195 | _UpgradeReport_Files/
196 | Backup*/
197 | UpgradeLog*.XML
198 | UpgradeLog*.htm
199 |
200 | # SQL Server files
201 | *.mdf
202 | *.ldf
203 |
204 | # Business Intelligence projects
205 | *.rdl.data
206 | *.bim.layout
207 | *.bim_*.settings
208 |
209 | # Microsoft Fakes
210 | FakesAssemblies/
211 |
212 | # GhostDoc plugin setting file
213 | *.GhostDoc.xml
214 |
215 | # Node.js Tools for Visual Studio
216 | .ntvs_analysis.dat
217 |
218 | # Visual Studio 6 build log
219 | *.plg
220 |
221 | # Visual Studio 6 workspace options file
222 | *.opt
223 |
224 | # Visual Studio LightSwitch build output
225 | **/*.HTMLClient/GeneratedArtifacts
226 | **/*.DesktopClient/GeneratedArtifacts
227 | **/*.DesktopClient/ModelManifest.xml
228 | **/*.Server/GeneratedArtifacts
229 | **/*.Server/ModelManifest.xml
230 | _Pvt_Extensions
231 |
232 | # Paket dependency manager
233 | .paket/paket.exe
234 |
235 | # FAKE - F# Make
236 | .fake/
237 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 henrik molnes
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.Tests/LinkCrawler.Tests.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | AnyCPU
7 | {964A55BE-AC53-4E97-BAA9-55323323E3E3}
8 | Library
9 | Properties
10 | LinkCrawler.Tests
11 | LinkCrawler.Tests
12 | v4.5.2
13 | 512
14 |
15 |
16 | true
17 | full
18 | false
19 | bin\Debug\
20 | DEBUG;TRACE
21 | prompt
22 | 4
23 |
24 |
25 | pdbonly
26 | true
27 | bin\Release\
28 | TRACE
29 | prompt
30 | 4
31 |
32 |
33 |
34 | ..\packages\AutoFixture.4.5.0\lib\net452\AutoFixture.dll
35 |
36 |
37 | ..\packages\AutoFixture.AutoMoq.4.5.0\lib\net452\AutoFixture.AutoMoq.dll
38 |
39 |
40 | ..\packages\Fare.2.1.1\lib\net35\Fare.dll
41 |
42 |
43 | ..\packages\Moq.4.2.1510.2205\lib\net40\Moq.dll
44 | True
45 |
46 |
47 | ..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll
48 | True
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 | App.config
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 | {db53303b-f9fb-4d77-b656-d05db0420e6a}
81 | LinkCrawler
82 |
83 |
84 |
85 |
86 |
87 |
88 |
95 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.Tests/LinkCrawlerTests.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Models;
2 | using LinkCrawler.Utils.Clients;
3 | using LinkCrawler.Utils.Outputs;
4 | using LinkCrawler.Utils.Parsers;
5 | using LinkCrawler.Utils.Settings;
6 | using Moq;
7 | using NUnit.Framework;
8 | using System.Linq;
9 |
10 | namespace LinkCrawler.Tests
11 | {
12 | //Will test the dependencies LinkCrawler class has upon other classes
13 | [TestFixture]
14 | public class LinkCrawlerTests
15 | {
16 | public LinkCrawler LinkCrawler { get; set; }
17 | public Mock MockSlackClient { get; set; }
18 | public Settings Settings { get; set; }
19 |
20 | [SetUp]
21 | public void SetUp()
22 | {
23 | MockSlackClient = new Mock();
24 | Settings = new Settings();
25 | var parser = new ValidUrlParser(Settings);
26 | var outputs = new IOutput[]
27 | {
28 | new SlackOutput(MockSlackClient.Object),
29 | };
30 |
31 | LinkCrawler = new LinkCrawler(outputs, parser, Settings);
32 | }
33 |
34 | [Test]
35 | public void WriteOutputAndNotifySlack_SucessResponse_NotifySlack()
36 | {
37 | var mockResponseModel = new Mock();
38 | mockResponseModel.Setup(x => x.IsSuccess).Returns(false);
39 |
40 | LinkCrawler.WriteOutput(mockResponseModel.Object);
41 | MockSlackClient.Verify(m => m.NotifySlack(mockResponseModel.Object));
42 | }
43 |
44 | [Test]
45 | public void CrawlForLinksInResponse_ResponseModelWithMarkup_ValidUrlFoundInMarkup()
46 | {
47 | var url = "http://www.github.com";
48 | var markup = string.Format("this is html document a valid link", url);
49 | var mockResponseModel = new Mock();
50 | mockResponseModel.Setup(x => x.Markup).Returns(markup);
51 |
52 | LinkCrawler.CrawlForLinksInResponse(mockResponseModel.Object);
53 | Assert.That(LinkCrawler.UrlList.Where(l=>l.Address == url).Count() > 0);
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.Tests/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // General Information about an assembly is controlled through the following
6 | // set of attributes. Change these attribute values to modify the information
7 | // associated with an assembly.
8 | [assembly: AssemblyTitle("LinkCrawler.Tests")]
9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("")]
12 | [assembly: AssemblyProduct("LinkCrawler.Tests")]
13 | [assembly: AssemblyCopyright("Copyright © 2016")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components. If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible(false)]
21 |
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid("964a55be-ac53-4e97-baa9-55323323e3e3")]
24 |
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | // Major Version
28 | // Minor Version
29 | // Build Number
30 | // Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.Tests/UtilsTests/ClientsTests/SlackClientTests.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Utils.Clients;
2 | using NUnit.Framework;
3 | using LinkCrawler.Utils.Settings;
4 |
5 | namespace LinkCrawler.Tests.UtilsTests.ClientsTests {
6 |
7 | [TestFixture]
8 | public class SlackClientTests {
9 |
10 | //MethodName_StateUnderTest_ExpectedBehaviour
11 | [Test]
12 | public void SlackClient_InstantiationWithWebHookUrl_InstantiatedCorrectlyWithWebHookUrl() {
13 | MockSettings settings = new MockSettings(true);
14 | SlackClient sc = new SlackClient(settings);
15 |
16 | Assert.AreEqual(@"https://hooks.slack.com/services/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", sc.WebHookUrl);
17 | Assert.AreEqual("Homer Bot", sc.BotName);
18 | Assert.AreEqual(":homer:", sc.BotIcon);
19 | Assert.AreEqual("*Doh! There is a link not working* Url: {0} Statuscode: {1} The link is placed on this page: {2}", sc.MessageFormat);
20 | Assert.IsTrue(sc.HasWebHookUrl);
21 |
22 | }
23 |
24 | [Test]
25 | public void SlackClient_InstantiationWithoutWebHookUrl_InstantiatedCorrectlyWithoutWebHookUrl() {
26 | MockSettings settings = new MockSettings(false);
27 | SlackClient sc = new SlackClient(settings);
28 |
29 | Assert.AreEqual("", sc.WebHookUrl);
30 | Assert.AreEqual("Homer Bot", sc.BotName);
31 | Assert.AreEqual(":homer:", sc.BotIcon);
32 | Assert.AreEqual("*Doh! There is a link not working* Url: {0} Statuscode: {1} The link is placed on this page: {2}", sc.MessageFormat);
33 | Assert.IsFalse(sc.HasWebHookUrl);
34 |
35 | }
36 | }
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.Tests/UtilsTests/ExtensionsTests/HttpStatusCodeExtensionsTests.cs:
--------------------------------------------------------------------------------
1 | using System.Net;
2 | using LinkCrawler.Utils.Extensions;
3 | using NUnit.Framework;
4 |
5 | namespace LinkCrawler.Tests.UtilsTests.ExtensionsTests
6 | {
7 | [TestFixture]
8 | public class HttpStatusCodeExtensionsTests
9 | {
10 | [TestCase(HttpStatusCode.OK,"200", true, Description = "Simple case of exact match")]
11 | [TestCase(HttpStatusCode.OK,"404", false, Description = "Simple case of no exact match")]
12 | [TestCase(HttpStatusCode.OK,"200,404", true, Description = "More complex: two codes in config")]
13 | [TestCase(HttpStatusCode.OK,"200, 404", true, Description = "Space after comma is okay")]
14 | [TestCase(HttpStatusCode.OK,"2xx", true, Description = "Use wildcards in code number")]
15 | [TestCase(HttpStatusCode.OK,"2Xx", true, Description = "X wildcard is not case sensitive")]
16 | [TestCase(HttpStatusCode.NotFound ,"xX4", true, Description = "Wildcard can be used for any digit")]
17 | [TestCase(HttpStatusCode.OK ,"2xx,xX0", true, Description = "Multiple wildcard codes allowed")]
18 | public void Will_match_a_single_code_exactly(HttpStatusCode givenCode, string givenConfig, bool expectedOutcome)
19 | {
20 | var result = givenCode.IsSuccess(givenConfig);
21 |
22 | Assert.That(result, Is.EqualTo(expectedOutcome));
23 | }
24 | }
25 | }
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.Tests/UtilsTests/ExtensionsTests/RegexExtensionsTest.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Utils.Extensions;
2 | using NUnit.Framework;
3 | using System;
4 | using System.Collections.Generic;
5 | using System.Linq;
6 | using System.Text;
7 | using System.Threading.Tasks;
8 |
9 | namespace LinkCrawler.Tests.UtilsTests.ExtensionsTests
10 | {
11 | [TestFixture]
12 | public class RegexExtensionsTest
13 | {
14 | public void IsNotMatch_Should_Return_False()
15 | {
16 |
17 | string regex = "(^http[s]?:\\/{2})|(^www)|(^\\/{1,2})";
18 | string url = "website.com:///podcast/";
19 | bool expression = RegexExtensions.IsNotMatch(new System.Text.RegularExpressions.Regex(regex), url);
20 | Assert.IsFalse(expression);
21 | }
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.Tests/UtilsTests/ExtensionsTests/StringExtensionsTests.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Utils.Extensions;
2 | using NUnit.Framework;
3 |
4 | namespace LinkCrawler.Tests.UtilsTests.ExtensionsTests
5 | {
6 | [TestFixture]
7 | public class StringExtensionsTests
8 | {
9 | //MethodName_StateUnderTest_ExpectedBehaviour
10 | [Test]
11 | public void ToBool_StringValueIsTrue_BooleanValueIsTrue()
12 | {
13 | var stringValue = "true";
14 | var booleanValue = stringValue.ToBool();
15 | Assert.AreEqual(true, booleanValue);
16 | }
17 |
18 | [Test]
19 | public void ToBool_StringValueIsFalse_BooleanValueIsFalse()
20 | {
21 | var stringValue = "false";
22 | var booleanValue = stringValue.ToBool();
23 | Assert.AreEqual(false, booleanValue);
24 | }
25 |
26 | [Test]
27 | public void ToBool_StringValueIsFoobar_BooleanValueIsFalse()
28 | {
29 | var stringValue = "Foobar";
30 | var booleanValue = stringValue.ToBool();
31 | Assert.AreEqual(false, booleanValue);
32 | }
33 |
34 | [Test]
35 | public void ToBool_StringValueIsEmpty_BooleanValueIsFalse()
36 | {
37 | var stringValue = "";
38 | var booleanValue = stringValue.ToBool();
39 | Assert.AreEqual(false, booleanValue);
40 | }
41 |
42 | [Test]
43 | public void ToBool_StringValueIsNull_BooleanValueIsFalse()
44 | {
45 | string stringValue = null;
46 | var booleanValue = stringValue.ToBool();
47 | Assert.AreEqual(false, booleanValue);
48 | }
49 |
50 | [Test]
51 | public void StartsWithIgnoreCase_SameLetterAndSameCase_True()
52 | {
53 | var word = "Foobar";
54 | var letter = "F";
55 | var result = word.StartsWithIgnoreCase(letter);
56 | Assert.AreEqual(true, result);
57 | }
58 |
59 | [Test]
60 | public void StartsWithIgnoreCase_SameLetterAndDifferentCase_True()
61 | {
62 | var word = "Foobar";
63 | var letter = "f";
64 | var result = word.StartsWithIgnoreCase(letter);
65 | Assert.AreEqual(true, result);
66 | }
67 |
68 | [Test]
69 | public void StartsWithIgnoreCase_EmptyWord_False()
70 | {
71 | var word = "";
72 | var letter = "A";
73 | var result = word.StartsWithIgnoreCase(letter);
74 | Assert.AreEqual(false, result);
75 | }
76 |
77 | [Test]
78 | public void StartsWithIgnoreCase_LetterIsBlankSpace_False()
79 | {
80 | var word = "Foobar";
81 | var letter = " ";
82 | var result = word.StartsWithIgnoreCase(letter);
83 | Assert.AreEqual(false, result);
84 | }
85 |
86 | [Test]
87 | public void StartsWithIgnoreCase_LetterIsNull_False()
88 | {
89 | var word = "Foobar";
90 | string letter = null;
91 | var result = word.StartsWithIgnoreCase(letter);
92 | Assert.AreEqual(false, result);
93 | }
94 |
95 | [Test]
96 | public void StartsWithIgnoreCase_WordIsNull_False()
97 | {
98 | string word = null;
99 | var letter = "F";
100 | var result = word.StartsWithIgnoreCase(letter);
101 | Assert.AreEqual(false, result);
102 | }
103 |
104 | [Test]
105 | public void StartsWithIgnoreCase_DifferentLetterAndDifferentCase_True()
106 | {
107 | var word = "Foobar";
108 | var letter = "a";
109 | var result = word.StartsWithIgnoreCase(letter);
110 | Assert.AreEqual(false, result);
111 | }
112 |
113 | [Test]
114 | public void StartsWithIgnoreCase_DifferentLetterAndSameCase_True()
115 | {
116 | var word = "Foobar";
117 | var letter = "A";
118 | var result = word.StartsWithIgnoreCase(letter);
119 | Assert.AreEqual(false, result);
120 | }
121 |
122 | [Test]
123 | public void TrimEnd_InputNull_Null()
124 | {
125 | string input = null;
126 | string expected = null;
127 |
128 | var actual = input.TrimEnd("");
129 |
130 | Assert.AreEqual(expected, actual);
131 | }
132 |
133 | [Test]
134 | public void TrimEnd_InputEndsWithSuffix_RemovesSuffix()
135 | {
136 | string input = "friend";
137 | string expected = "fri";
138 |
139 | var actual = input.TrimEnd("end");
140 |
141 | Assert.AreEqual(expected, actual);
142 | }
143 |
144 | [Test]
145 | public void TrimEnd_InputEndsWithSuffixDifferentCase_ReturnsOriginal()
146 | {
147 | string input = "friEND";
148 | string expected = "friEND";
149 |
150 | var actual = input.TrimEnd("end");
151 |
152 | Assert.AreEqual(expected, actual);
153 | }
154 |
155 | [Test]
156 | public void TrimEnd_InputEndsWithSuffixDifferentCase_ReturnsEmptyString()
157 | {
158 | string input = "friend";
159 | string expected = string.Empty;
160 |
161 | var actual = input.TrimEnd("friend");
162 |
163 | Assert.AreEqual(expected, actual);
164 | }
165 | }
166 | }
167 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.Tests/UtilsTests/OutputsTests/SlackOutputTests.cs:
--------------------------------------------------------------------------------
1 | using AutoFixture;
2 | using NUnit.Framework;
3 | using LinkCrawler.Utils.Outputs;
4 | using AutoFixture.AutoMoq;
5 | using LinkCrawler.Utils.Clients;
6 | using LinkCrawler.Models;
7 | using Moq;
8 |
9 | namespace LinkCrawler.Tests.UtilsTests.OutputsTests
10 | {
11 | [TestFixture]
12 | public class SlackOutputTests
13 | {
14 | private Fixture _fixture;
15 |
16 | [SetUp]
17 | public void SetUup()
18 | {
19 | _fixture = new Fixture();
20 | _fixture.Customize(new AutoMoqCustomization());
21 | }
22 |
23 | [Test]
24 | public void WriteError()
25 | {
26 |
27 | var slackClient = _fixture.Freeze>();
28 | var slackOutput = _fixture.Freeze();
29 | var responseModel = _fixture.Create();
30 |
31 | slackClient.Setup(x => x.NotifySlack(responseModel));
32 |
33 | slackOutput.WriteError(responseModel);
34 |
35 | slackClient.VerifyAll();
36 |
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.Tests/UtilsTests/ParsersTests/ValidUrlParserTests.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Utils.Parsers;
2 | using LinkCrawler.Utils.Settings;
3 | using NUnit.Framework;
4 |
5 | namespace LinkCrawler.Tests.UtilsTests.ParsersTests
6 | {
7 | [TestFixture]
8 | public class ValidUrlParserTests
9 | {
10 | public ValidUrlParser ValidUrlParser { get; set; }
11 | [SetUp]
12 | public void SetUp()
13 | {
14 | ValidUrlParser = new ValidUrlParser(new Settings());
15 | }
16 |
17 | [Test]
18 | public void Parse_CompleteValidUrl_True()
19 | {
20 | var url = "http://www.github.com";
21 | string parsed;
22 | var result = ValidUrlParser.Parse(url, out parsed);
23 | Assert.That(result, Is.True);
24 | Assert.That(parsed, Is.EqualTo(url));
25 | }
26 |
27 | [Test]
28 | public void Parse_UrlNoScheme_True()
29 | {
30 | var url = "//www.github.com";
31 | string parsed;
32 | var result = ValidUrlParser.Parse(url, out parsed);
33 | Assert.That(result, Is.True);
34 | var validUrl = "http:" + url;
35 | Assert.That(parsed, Is.EqualTo(validUrl));
36 | }
37 |
38 | [Test]
39 | public void Parse_UrlOnlyRelativePath_True()
40 | {
41 | var relativeUrl = "/relative/path";
42 | string parsed;
43 | var result = ValidUrlParser.Parse(relativeUrl, out parsed);
44 | Assert.That(result, Is.True);
45 | var validUrl = string.Format("{0}{1}",ValidUrlParser.BaseUrl, relativeUrl);
46 |
47 | Assert.That(parsed, Is.EqualTo(validUrl));
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.Tests/packages.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 14
4 | VisualStudioVersion = 14.0.24720.0
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkCrawler", "LinkCrawler\LinkCrawler.csproj", "{DB53303B-F9FB-4D77-B656-D05DB0420E6A}"
7 | EndProject
8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkCrawler.Tests", "LinkCrawler.Tests\LinkCrawler.Tests.csproj", "{964A55BE-AC53-4E97-BAA9-55323323E3E3}"
9 | EndProject
10 | Global
11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | Debug|Any CPU = Debug|Any CPU
13 | Release|Any CPU = Release|Any CPU
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {DB53303B-F9FB-4D77-B656-D05DB0420E6A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17 | {DB53303B-F9FB-4D77-B656-D05DB0420E6A}.Debug|Any CPU.Build.0 = Debug|Any CPU
18 | {DB53303B-F9FB-4D77-B656-D05DB0420E6A}.Release|Any CPU.ActiveCfg = Release|Any CPU
19 | {DB53303B-F9FB-4D77-B656-D05DB0420E6A}.Release|Any CPU.Build.0 = Release|Any CPU
20 | {964A55BE-AC53-4E97-BAA9-55323323E3E3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | {964A55BE-AC53-4E97-BAA9-55323323E3E3}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | {964A55BE-AC53-4E97-BAA9-55323323E3E3}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | {964A55BE-AC53-4E97-BAA9-55323323E3E3}.Release|Any CPU.Build.0 = Release|Any CPU
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | EndGlobal
29 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/App.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/LinkCrawler.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Models;
2 | using LinkCrawler.Utils.Extensions;
3 | using LinkCrawler.Utils.Helpers;
4 | using LinkCrawler.Utils.Outputs;
5 | using LinkCrawler.Utils.Parsers;
6 | using LinkCrawler.Utils.Settings;
7 | using RestSharp;
8 | using System;
9 | using System.Collections.Generic;
10 | using System.Diagnostics;
11 | using System.Linq;
12 |
13 | namespace LinkCrawler
14 | {
15 | public class LinkCrawler
16 | {
17 | public string BaseUrl { get; set; }
18 | public bool CheckImages { get; set; }
19 | public RestRequest RestRequest { get; set; }
20 | public IEnumerable Outputs { get; set; }
21 | public IValidUrlParser ValidUrlParser { get; set; }
22 | public bool OnlyReportBrokenLinksToOutput { get; set; }
23 | public static List UrlList;
24 | private ISettings _settings;
25 | private Stopwatch _timer;
26 |
27 | public LinkCrawler(IEnumerable outputs, IValidUrlParser validUrlParser, ISettings settings)
28 | {
29 | BaseUrl = settings.BaseUrl;
30 | Outputs = outputs;
31 | ValidUrlParser = validUrlParser;
32 | CheckImages = settings.CheckImages;
33 | UrlList = new List();
34 | RestRequest = new RestRequest(Method.GET).SetHeader("Accept", "*/*");
35 | OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput;
36 | _settings = settings;
37 | _timer = new Stopwatch();
38 | }
39 |
40 | public void Start()
41 | {
42 | _timer.Start();
43 | UrlList.Add(new LinkModel(BaseUrl));
44 | SendRequest(BaseUrl);
45 | }
46 |
47 | public void SendRequest(string crawlUrl, string referrerUrl = "")
48 | {
49 | var requestModel = new RequestModel(crawlUrl, referrerUrl, BaseUrl);
50 | var restClient = new RestClient(new Uri(crawlUrl)) { FollowRedirects = false };
51 |
52 | restClient.ExecuteAsync(RestRequest, response =>
53 | {
54 | if (response == null)
55 | return;
56 |
57 | var responseModel = new ResponseModel(response, requestModel, _settings);
58 | ProcessResponse(responseModel);
59 | });
60 | }
61 |
62 | public void ProcessResponse(IResponseModel responseModel)
63 | {
64 | WriteOutput(responseModel);
65 |
66 | if (responseModel.ShouldCrawl)
67 | CrawlForLinksInResponse(responseModel);
68 | }
69 |
70 | public void CrawlForLinksInResponse(IResponseModel responseModel)
71 | {
72 | var linksFoundInMarkup = MarkupHelpers.GetValidUrlListFromMarkup(responseModel.Markup, ValidUrlParser, CheckImages);
73 |
74 | foreach (var url in linksFoundInMarkup)
75 | {
76 | lock (UrlList)
77 | {
78 | if (UrlList.Where(l => l.Address == url).Count() > 0)
79 | continue;
80 |
81 | UrlList.Add(new LinkModel(url));
82 | }
83 | SendRequest(url, responseModel.RequestedUrl);
84 | }
85 | }
86 |
87 | public void WriteOutput(IResponseModel responseModel)
88 | {
89 | if (!responseModel.IsSuccess)
90 | {
91 | foreach (var output in Outputs)
92 | {
93 | output.WriteError(responseModel);
94 | }
95 | }
96 | else if (!OnlyReportBrokenLinksToOutput)
97 | {
98 | foreach (var output in Outputs)
99 | {
100 | output.WriteInfo(responseModel);
101 | }
102 | }
103 |
104 | CheckIfFinal(responseModel);
105 | }
106 |
107 | private void CheckIfFinal(IResponseModel responseModel)
108 | {
109 | lock (UrlList)
110 | {
111 |
112 | // First set the status code for the completed link (this will set "CheckingFinished" to true)
113 | foreach (LinkModel lm in UrlList.Where(l => l.Address == responseModel.RequestedUrl))
114 | {
115 | lm.StatusCode = responseModel.StatusCodeNumber;
116 | }
117 |
118 | // Then check to see whether there are any pending links left to check
119 | if ((UrlList.Count > 1) && (UrlList.Where(l => l.CheckingFinished == false).Count() == 0))
120 | {
121 | FinaliseSession();
122 | }
123 | }
124 | }
125 |
126 | private void FinaliseSession()
127 | {
128 | _timer.Stop();
129 | if (_settings.PrintSummary)
130 | {
131 | List messages = new List();
132 | messages.Add(""); // add blank line to differentiate summary from main output
133 |
134 | messages.Add("Processing complete. Checked " + UrlList.Count() + " links in " + _timer.ElapsedMilliseconds.ToString() + "ms");
135 |
136 | messages.Add("");
137 | messages.Add(" Status | # Links");
138 | messages.Add(" -------+--------");
139 |
140 | IEnumerable> StatusSummary = UrlList.GroupBy(link => link.StatusCode, link => link.Address);
141 | foreach(IGrouping statusGroup in StatusSummary)
142 | {
143 | messages.Add(String.Format(" {0} | {1,5}", statusGroup.Key, statusGroup.Count()));
144 | }
145 |
146 | foreach (var output in Outputs)
147 | {
148 | output.WriteInfo(messages.ToArray());
149 | }
150 | }
151 | }
152 | }
153 | }
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/LinkCrawler.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | AnyCPU
7 | {DB53303B-F9FB-4D77-B656-D05DB0420E6A}
8 | Exe
9 | Properties
10 | LinkCrawler
11 | LinkCrawler
12 | v4.5.2
13 | 512
14 | true
15 |
16 |
17 | AnyCPU
18 | true
19 | full
20 | false
21 | bin\Debug\
22 | DEBUG;TRACE
23 | prompt
24 | 4
25 |
26 |
27 | AnyCPU
28 | pdbonly
29 | true
30 | bin\Release\
31 | TRACE
32 | prompt
33 | 4
34 |
35 |
36 |
37 | ..\packages\Bootstrapper.2.0.3.13\lib\Net40\Bootstrapper.dll
38 | True
39 |
40 |
41 | ..\packages\Bootstrapper.StructureMap.2.0.3.13\lib\Net40\Bootstrapper.StructureMap.dll
42 | True
43 |
44 |
45 | ..\packages\CommonServiceLocator.StructureMapAdapter.Unofficial.3.0.4.125\lib\Net40\CommonServiceLocator.StructureMapAdapter.Unofficial.dll
46 | True
47 |
48 |
49 | ..\packages\HtmlAgilityPack.1.4.9\lib\Net45\HtmlAgilityPack.dll
50 | True
51 |
52 |
53 | ..\packages\CommonServiceLocator.1.3\lib\portable-net4+sl5+netcore45+wpa81+wp8\Microsoft.Practices.ServiceLocation.dll
54 | True
55 |
56 |
57 | ..\packages\RestSharp.105.2.3\lib\net452\RestSharp.dll
58 | True
59 |
60 |
61 | ..\packages\structuremap.3.0.4.125\lib\net40\StructureMap.dll
62 | True
63 |
64 |
65 | ..\packages\structuremap.3.0.4.125\lib\net40\StructureMap.Net4.dll
66 | True
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 | Designer
111 |
112 |
113 |
114 |
115 |
116 |
123 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Models/IResponseModel.cs:
--------------------------------------------------------------------------------
1 | using System.Net;
2 |
3 | namespace LinkCrawler.Models
4 | {
5 | public interface IResponseModel
6 | {
7 | string Markup { get; }
8 | string RequestedUrl { get; }
9 | string ReferrerUrl { get; }
10 | HttpStatusCode StatusCode { get; }
11 | int StatusCodeNumber { get; }
12 | bool IsSuccess { get; }
13 | bool ShouldCrawl { get; }
14 | string ToString();
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Models/LinkModel.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace LinkCrawler.Models
8 | {
9 | public class LinkModel
10 | {
11 | public string Address { get; private set; }
12 | public bool CheckingFinished { get; private set; }
13 | private int _statusCode;
14 |
15 | public int StatusCode
16 | {
17 | get
18 | {
19 | return _statusCode;
20 | }
21 | set
22 | {
23 | _statusCode = value;
24 | CheckingFinished = true;
25 | }
26 | }
27 |
28 | public LinkModel (string address)
29 | {
30 | Address = address;
31 | CheckingFinished = false;
32 | }
33 |
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Models/RequestModel.cs:
--------------------------------------------------------------------------------
1 | using RestSharp;
2 |
3 | namespace LinkCrawler.Models
4 | {
5 | public class RequestModel
6 | {
7 | public string Url;
8 | public string ReferrerUrl;
9 | public bool IsInternalUrl { get; set; }
10 | public RestClient Client;
11 |
12 | public RequestModel(string url, string referrerUrl, string baseUrl)
13 | {
14 | Url = url;
15 | IsInternalUrl = url.StartsWith(baseUrl);
16 | ReferrerUrl = referrerUrl;
17 | Client = new RestClient(Url);
18 | }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Models/ResponseModel.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Utils.Extensions;
2 | using RestSharp;
3 | using System;
4 | using System.Net;
5 | using LinkCrawler.Utils.Settings;
6 |
7 | namespace LinkCrawler.Models
8 | {
9 | public class ResponseModel : IResponseModel
10 | {
11 | public string Markup { get; }
12 | public string RequestedUrl { get; }
13 | public string ReferrerUrl { get; }
14 |
15 | public HttpStatusCode StatusCode { get; }
16 | public int StatusCodeNumber { get { return (int)StatusCode; } }
17 | public bool IsSuccess { get; }
18 | public bool ShouldCrawl { get; }
19 |
20 | public ResponseModel(IRestResponse restResponse, RequestModel requestModel, ISettings settings)
21 | {
22 | ReferrerUrl = requestModel.ReferrerUrl;
23 | StatusCode = restResponse.StatusCode;
24 | RequestedUrl = requestModel.Url;
25 | IsSuccess = settings.IsSuccess(StatusCode);
26 | if (!IsSuccess)
27 | return;
28 | Markup = restResponse.Content;
29 | ShouldCrawl = IsSuccess && requestModel.IsInternalUrl && restResponse.IsHtmlDocument();
30 | }
31 |
32 | public override string ToString()
33 | {
34 | if (!IsSuccess)
35 | return string.Format("{0}\t{1}\t{2}{3}\tReferer:\t{4}", StatusCodeNumber, StatusCode, RequestedUrl, Environment.NewLine, ReferrerUrl);
36 |
37 | return string.Format("{0}\t{1}\t{2}", StatusCodeNumber, StatusCode, RequestedUrl);
38 | }
39 | }
40 | }
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Program.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Utils;
2 | using StructureMap;
3 | using System;
4 | using LinkCrawler.Utils.Parsers;
5 | using LinkCrawler.Utils.Settings;
6 |
7 | namespace LinkCrawler
8 | {
9 | class Program
10 | {
11 | static void Main(string[] args)
12 | {
13 |
14 | using (var container = Container.For())
15 | {
16 | var linkCrawler = container.GetInstance();
17 | if (args.Length > 0)
18 | {
19 | string parsed;
20 | var validUrlParser = new ValidUrlParser(new Settings());
21 | var result = validUrlParser.Parse(args[0], out parsed);
22 | if(result)
23 | linkCrawler.BaseUrl = parsed;
24 | }
25 | linkCrawler.Start();
26 | Console.Read();
27 | }
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // General Information about an assembly is controlled through the following
6 | // set of attributes. Change these attribute values to modify the information
7 | // associated with an assembly.
8 | [assembly: AssemblyTitle("LinkCrawler")]
9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("Making Waves")]
12 | [assembly: AssemblyProduct("LinkCrawler")]
13 | [assembly: AssemblyCopyright("Copyright © Making Waves 2016")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components. If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible(false)]
21 |
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid("db53303b-f9fb-4d77-b656-d05db0420e6a")]
24 |
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | // Major Version
28 | // Minor Version
29 | // Build Number
30 | // Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Clients/ISlackClient.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Models;
2 |
3 | namespace LinkCrawler.Utils.Clients
4 | {
5 | public interface ISlackClient
6 | {
7 | string WebHookUrl { get; set; }
8 | string BotName { get; set; }
9 | string BotIcon { get; set; }
10 | string MessageFormat { get; set; }
11 | bool HasWebHookUrl { get; }
12 | void NotifySlack(IResponseModel responseModel);
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Clients/SlackClient.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Models;
2 | using LinkCrawler.Utils.Settings;
3 | using RestSharp;
4 |
5 | namespace LinkCrawler.Utils.Clients
6 | {
7 | public class SlackClient : ISlackClient
8 | {
9 | public string WebHookUrl { get; set; }
10 | public string BotName { get; set; }
11 | public string BotIcon { get; set; }
12 | public string MessageFormat { get; set; }
13 |
14 | public bool HasWebHookUrl
15 | {
16 | get { return !string.IsNullOrEmpty(WebHookUrl); }
17 | }
18 |
19 | public SlackClient(ISettings settings)
20 | {
21 | WebHookUrl = settings.SlackWebHookUrl;
22 | BotName = settings.SlackWebHookBotName;
23 | BotIcon = settings.SlackWebHookBotIconEmoji;
24 | MessageFormat = settings.SlackWebHookBotMessageFormat;
25 | }
26 |
27 | public void NotifySlack(IResponseModel responseModel)
28 | {
29 | if (!HasWebHookUrl)
30 | return;
31 |
32 | var message = string.Format(MessageFormat, responseModel.RequestedUrl, responseModel.StatusCodeNumber, responseModel.ReferrerUrl);
33 |
34 | var client = new RestClient(WebHookUrl);
35 | var request = new RestRequest(Method.POST) { RequestFormat = DataFormat.Json };
36 | request.AddBody(
37 | new
38 | {
39 | text = message,
40 | username = BotName,
41 | icon_emoji = BotIcon,
42 | mrkdwn = true
43 | });
44 |
45 | client.ExecuteAsync(request, null);
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Extensions/HttpStatusCodeExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Linq;
3 | using System.Net;
4 | using System.Text.RegularExpressions;
5 |
6 | namespace LinkCrawler.Utils.Extensions
7 | {
8 | public static class HttpStatusCodeExtensions
9 | {
10 | ///
11 | /// This method will determine if an HttpStatusCode represents a "success" or not
12 | /// based on the configuration string you pass in.
13 | /// You can pass literal codes like 100, 200, 404
14 | /// Or you can pass in simple patterns using "x"s as wildcards like: 1xx, xx4
15 | ///
16 | /// The HttpStatus code to use
17 | /// CSV of HttpStatus codes
18 | ///
19 | public static bool IsSuccess(this HttpStatusCode @this, string configuredCodes)
20 | {
21 | var codeCollection = configuredCodes
22 | .Split(new string[] {","}, StringSplitOptions.RemoveEmptyEntries) // split into array
23 | .Select(c => c.Trim().ToLower()) // trim off any spaces, and make lowercase (this allows for "100,20X" and "100, 20x)"
24 | .ToList();
25 |
26 | var codeNumberAsString = ((int) @this).ToString();
27 |
28 | // test for simple exact matching
29 | if (codeCollection.Contains(codeNumberAsString))
30 | return true;
31 |
32 | // replace X's with regex single character wildcard
33 | var codeCollectionRegexs = codeCollection
34 | .Where(c => c.Contains("x"))
35 | .Select(c => c)
36 | .ToList();
37 |
38 | // if there aren't any codes with wildcards, just bail now
39 | if (!codeCollectionRegexs.Any())
40 | return false;
41 |
42 | return codeCollectionRegexs
43 | .Select(ToRegex)
44 | .Any(x => Regex.IsMatch(codeNumberAsString, x));
45 | }
46 |
47 | private static string ToRegex(string s)
48 | {
49 | return s.Replace("x", "[0123456789]");
50 | }
51 | }
52 | }
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Extensions/RegexExtensions.cs:
--------------------------------------------------------------------------------
1 | using System.Text.RegularExpressions;
2 |
3 | namespace LinkCrawler.Utils.Extensions
4 | {
5 | public static class RegexExtensions
6 | {
7 | public static bool IsNotMatch(this Regex regex, string str)
8 | {
9 | return !regex.IsMatch(str);
10 | }
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Extensions/RestRequestExtensions.cs:
--------------------------------------------------------------------------------
1 | using RestSharp;
2 |
3 | namespace LinkCrawler.Utils.Extensions
4 | {
5 | public static class RestRequestExtensions
6 | {
7 | public static RestRequest SetHeader(this RestRequest restRequest, string name, string value)
8 | {
9 | restRequest.AddHeader(name, value);
10 | return restRequest;
11 | }
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Extensions/RestResponseExtensions.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Utils.Settings;
2 | using RestSharp;
3 |
4 | namespace LinkCrawler.Utils.Extensions
5 | {
6 | public static class RestResponseExtensions
7 | {
8 | public static bool IsHtmlDocument(this IRestResponse restResponse)
9 | {
10 | return restResponse.ContentType.StartsWith(Constants.Response.ContentTypeTextHtml);
11 | }
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Extensions/StringExtensions.cs:
--------------------------------------------------------------------------------
1 | using System.Globalization;
2 |
3 | namespace LinkCrawler.Utils.Extensions
4 | {
5 | public static class StringExtensions
6 | {
7 | public static bool StartsWithIgnoreCase(this string str, string startsWith)
8 | {
9 | if (string.IsNullOrEmpty(str) && string.IsNullOrEmpty(startsWith))
10 | return true;
11 |
12 | if (string.IsNullOrEmpty(str) || string.IsNullOrEmpty(startsWith))
13 | return false;
14 |
15 | return str.StartsWith(startsWith, true, CultureInfo.InvariantCulture);
16 | }
17 | public static bool ToBool(this string str)
18 | {
19 | bool parsed;
20 | bool.TryParse(str, out parsed);
21 | return parsed;
22 | }
23 |
24 | public static string TrimEnd(this string input, string suffixToRemove)
25 | {
26 | if (input != null && suffixToRemove != null
27 | && input.EndsWith(suffixToRemove))
28 | {
29 | return input.Substring(0, input.Length - suffixToRemove.Length);
30 | }
31 | return input;
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Extensions/UriExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace LinkCrawler.Utils.Extensions
4 | {
5 | public static class UriExtensions
6 | {
7 | public static string RemoveSegments(this Uri uri)
8 | {
9 | var uriString = uri.ToString();
10 | var segments = string.Join(string.Empty, uri.Segments);
11 | return uriString.TrimEnd(segments);
12 | }
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Helpers/ConsoleHelper.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace LinkCrawler.Utils.Helpers
4 | {
5 | public static class ConsoleHelper
6 | {
7 | public static void WriteError(string text)
8 | {
9 | Console.ForegroundColor = ConsoleColor.Red;
10 | Console.WriteLine(text);
11 | Console.ResetColor();
12 | }
13 | }
14 | }
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Helpers/MarkupHelpers.cs:
--------------------------------------------------------------------------------
1 | using HtmlAgilityPack;
2 | using LinkCrawler.Utils.Parsers;
3 | using LinkCrawler.Utils.Settings;
4 | using System.Collections.Generic;
5 | using System.Linq;
6 |
7 | namespace LinkCrawler.Utils.Helpers
8 | {
9 | public static class MarkupHelpers
10 | {
11 | private static List GetAllUrlsFromHtmlDocument(string markup, string searchPattern, string attribute)
12 | {
13 | var htmlDoc = new HtmlDocument();
14 | htmlDoc.LoadHtml(markup);
15 | var nodes = htmlDoc.DocumentNode.SelectNodes(searchPattern);
16 |
17 | if (nodes == null || !nodes.Any())
18 | return new List();
19 |
20 | return nodes.Select(x => x.GetAttributeValue(attribute, string.Empty).TrimEnd('/')).ToList();
21 | }
22 |
23 | public static List GetAllUrlsFromMarkup(string markup, bool checkImageTags)
24 | {
25 | var linkUrls = GetAllUrlsFromHtmlDocument(markup, Constants.Html.LinkSearchPattern, Constants.Html.Href);
26 | if (checkImageTags)
27 | {
28 | var imgUrls = GetAllUrlsFromHtmlDocument(markup, Constants.Html.ImgSearchPattern, Constants.Html.Src);
29 | linkUrls.AddRange(imgUrls);
30 | }
31 | return linkUrls;
32 | }
33 |
34 | ///
35 | /// Get's a list of all urls in markup and tires to fix the urls that Restsharp will have a problem with
36 | /// (i.e relative urls, urls with no sceme, mailto links..etc)
37 | ///
38 | /// List of urls that will work with restsharp for sending http get
39 | public static List GetValidUrlListFromMarkup(string markup, IValidUrlParser parser, bool checkImages)
40 | {
41 | var urlList = GetAllUrlsFromMarkup(markup, checkImages);
42 | var validUrlList = new List();
43 |
44 | foreach (var url in urlList)
45 | {
46 | string validUrl;
47 | if (parser.Parse(url, out validUrl))
48 | {
49 | validUrlList.Add(validUrl);
50 | }
51 | }
52 | return validUrlList;
53 | }
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Outputs/ConsoleOutput.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using LinkCrawler.Models;
3 | using LinkCrawler.Utils.Helpers;
4 |
5 | namespace LinkCrawler.Utils.Outputs
6 | {
7 | public class ConsoleOutput : IOutput
8 | {
9 | public void WriteError(IResponseModel responseModel)
10 | {
11 | ConsoleHelper.WriteError(responseModel.ToString());
12 | }
13 |
14 | public void WriteInfo(IResponseModel responseModel)
15 | {
16 | WriteInfo(new string[] { responseModel.ToString() });
17 | }
18 |
19 | public void WriteInfo(String[] Info)
20 | {
21 | foreach(string line in Info) Console.WriteLine(line);
22 | }
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Outputs/CsvOutput.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Models;
2 | using LinkCrawler.Utils.Settings;
3 | using System;
4 | using System.IO;
5 |
6 | namespace LinkCrawler.Utils.Outputs
7 | {
8 | public class CsvOutput : IOutput, IDisposable
9 | {
10 | private readonly ISettings _settings;
11 | public TextWriter _writer;
12 |
13 | public CsvOutput(ISettings settings)
14 | {
15 | _settings = settings;
16 | Setup();
17 | }
18 |
19 | private void Setup()
20 | {
21 | var fileMode = _settings.CsvOverwrite ? FileMode.Create : FileMode.Append;
22 | var file = new FileStream(_settings.CsvFilePath, fileMode, FileAccess.Write);
23 |
24 | var streamWriter = new StreamWriter(file) {AutoFlush = true};
25 | _writer = TextWriter.Synchronized(streamWriter);
26 |
27 | if (fileMode == FileMode.Create)
28 | {
29 | _writer.WriteLine("Code{0}Status{0}Url{0}Referer", _settings.CsvDelimiter);
30 | }
31 | }
32 |
33 | public void WriteError(IResponseModel responseModel)
34 | {
35 | Write(responseModel);
36 | }
37 |
38 | public void WriteInfo(IResponseModel responseModel)
39 | {
40 | Write(responseModel);
41 | }
42 |
43 | public void WriteInfo(String[] Info)
44 | {
45 | // Do nothing - string info is only for console
46 | }
47 |
48 | private void Write(IResponseModel responseModel)
49 | {
50 | _writer?.WriteLine("{1}{0}{2}{0}{3}{0}{4}",
51 | _settings.CsvDelimiter,
52 | responseModel.StatusCodeNumber,
53 | responseModel.StatusCode,
54 | responseModel.RequestedUrl,
55 | responseModel.ReferrerUrl);
56 | }
57 |
58 | public void Dispose()
59 | {
60 | _writer?.Close();
61 | _writer?.Dispose();
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Outputs/IOutput.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Models;
2 |
3 | namespace LinkCrawler.Utils.Outputs
4 | {
5 | public interface IOutput
6 | {
7 | void WriteError(IResponseModel responseModel);
8 | void WriteInfo(IResponseModel responseModel);
9 | void WriteInfo(string[] InfoString);
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Outputs/SlackOutput.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Models;
2 | using LinkCrawler.Utils.Clients;
3 |
4 | namespace LinkCrawler.Utils.Outputs
5 | {
6 | public class SlackOutput : IOutput
7 | {
8 | private readonly ISlackClient _slackClient;
9 |
10 | public SlackOutput(ISlackClient slackClient)
11 | {
12 | _slackClient = slackClient;
13 | }
14 |
15 | public void WriteError(IResponseModel responseModel)
16 | {
17 | _slackClient.NotifySlack(responseModel);
18 | }
19 |
20 | public void WriteInfo(IResponseModel responseModel)
21 | {
22 | // Write nothing to Slack
23 | }
24 |
25 | public void WriteInfo(string[] Info)
26 | {
27 | // Write nothing to Slack
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Parsers/IValidUrlParser.cs:
--------------------------------------------------------------------------------
1 | using System.Text.RegularExpressions;
2 |
3 | namespace LinkCrawler.Utils.Parsers
4 | {
5 | public interface IValidUrlParser
6 | {
7 | Regex Regex { get; set; }
8 | string BaseUrl { get; set; }
9 | bool Parse(string url, out string validUrl);
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Parsers/ValidUrlParser.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Utils.Extensions;
2 | using LinkCrawler.Utils.Settings;
3 | using System;
4 | using System.Text.RegularExpressions;
5 |
6 | namespace LinkCrawler.Utils.Parsers
7 | {
8 | public class ValidUrlParser : IValidUrlParser
9 | {
10 | public Regex Regex { get; set; }
11 | public string BaseUrl { get; set; }
12 | public ValidUrlParser(ISettings settings)
13 | {
14 | Regex = new Regex(settings.ValidUrlRegex);
15 | var baseUri = new Uri(settings.BaseUrl);
16 | BaseUrl = baseUri.RemoveSegments();
17 | }
18 |
19 | public bool Parse(string url, out string validUrl)
20 | {
21 | validUrl = string.Empty;
22 |
23 | if (string.IsNullOrEmpty(url))
24 | return false;
25 |
26 | Uri parsedUri;
27 |
28 | if (Regex.IsNotMatch(url)
29 | || !Uri.TryCreate(url, UriKind.RelativeOrAbsolute, out parsedUri))
30 | return false;
31 |
32 | if (parsedUri.IsAbsoluteUri)
33 | {
34 | validUrl = url;
35 | return true;
36 | }
37 | if (url.StartsWith("//"))
38 | {
39 | var newUrl = string.Concat("http:", url);
40 | validUrl = newUrl;
41 | return true;
42 | }
43 | if (url.StartsWith("/"))
44 | {
45 | var newUrl = string.Concat(BaseUrl, url);
46 | validUrl = newUrl;
47 | return true;
48 | }
49 | return false;
50 | }
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Settings/Constants.cs:
--------------------------------------------------------------------------------
1 | namespace LinkCrawler.Utils.Settings
2 | {
3 | public static class Constants
4 | {
5 | public static class AppSettings
6 | {
7 | public const string BaseUrl = "BaseUrl";
8 | public const string ValidUrlRegex = "ValidUrlRegex";
9 | public const string OnlyReportBrokenLinksToOutput = "OnlyReportBrokenLinksToOutput";
10 | public const string CheckImages = "CheckImages";
11 | public const string SlackWebHookUrl = "Slack.WebHook.Url";
12 | public const string SlackWebHookBotName = "Slack.WebHook.Bot.Name";
13 | public const string SlackWebHookBotIconEmoji = "Slack.WebHook.Bot.IconEmoji";
14 | public const string SlackWebHookBotMessageFormat = "Slack.WebHook.Bot.MessageFormat";
15 | public const string CsvFilePath = "Csv.FilePath";
16 | public const string CsvOverwrite = "Csv.Overwrite";
17 | public const string CsvDelimiter = "Csv.Delimiter";
18 | public const string SuccessHttpStatusCodes = "SuccessHttpStatusCodes";
19 | public const string OutputProviders = "outputProviders";
20 | public const string PrintSummary = "PrintSummary";
21 | }
22 |
23 | public static class Response
24 | {
25 | public const string ContentTypeTextHtml = "text/html";
26 | }
27 |
28 | public static class Html
29 | {
30 | public const string Href = "href";
31 | public const string Src = "src";
32 | public const string LinkSearchPattern = "//a[@href]";
33 | public const string ImgSearchPattern = "//img[@src]";
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Settings/ISettings.cs:
--------------------------------------------------------------------------------
1 | using System.Net;
2 |
3 | namespace LinkCrawler.Utils.Settings
4 | {
5 | public interface ISettings
6 | {
7 | string BaseUrl { get; }
8 |
9 | string ValidUrlRegex { get; }
10 |
11 | bool CheckImages { get; }
12 |
13 | bool OnlyReportBrokenLinksToOutput { get; }
14 |
15 | string SlackWebHookUrl { get; }
16 |
17 | string SlackWebHookBotName { get; }
18 |
19 | string SlackWebHookBotIconEmoji { get; }
20 |
21 | string SlackWebHookBotMessageFormat { get; }
22 |
23 | string CsvFilePath { get; }
24 |
25 | bool CsvOverwrite { get; }
26 |
27 | string CsvDelimiter { get; }
28 |
29 | bool IsSuccess(HttpStatusCode statusCode);
30 |
31 | bool PrintSummary { get; }
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Settings/MockSettings.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Utils.Extensions;
2 | using System.Net;
3 |
4 | namespace LinkCrawler.Utils.Settings {
5 | public class MockSettings : ISettings {
6 |
7 | public string BaseUrl => "https://github.com";
8 |
9 | public bool CheckImages => true;
10 |
11 | public string CsvDelimiter => ";";
12 |
13 | public string CsvFilePath => @"C:\tmp\output.csv";
14 |
15 | public bool CsvOverwrite => true;
16 |
17 | public bool OnlyReportBrokenLinksToOutput => false;
18 |
19 | public string SlackWebHookBotIconEmoji => ":homer:";
20 |
21 | public string SlackWebHookBotMessageFormat => "*Doh! There is a link not working* Url: {0} Statuscode: {1} The link is placed on this page: {2}";
22 |
23 | public string SlackWebHookBotName => "Homer Bot";
24 | public bool PrintSummary => false;
25 | private bool IncludeWebHookUrl { get; set; }
26 | public string SlackWebHookUrl
27 | {
28 | get
29 | {
30 | return IncludeWebHookUrl ? @"https://hooks.slack.com/services/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" : "";
31 | }
32 | }
33 |
34 | public string ValidUrlRegex => @"(^http[s]?:\/{2})|(^www)|(^\/{1,2})";
35 |
36 | public bool IsSuccess(HttpStatusCode statusCode) {
37 | return statusCode.IsSuccess("1xx,2xx,3xx");
38 | }
39 |
40 | public MockSettings(bool includeWebHookUrl) {
41 | this.IncludeWebHookUrl = includeWebHookUrl;
42 | }
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/Settings/Settings.cs:
--------------------------------------------------------------------------------
1 | using LinkCrawler.Utils.Extensions;
2 | using System.Configuration;
3 | using System.Net;
4 |
5 | namespace LinkCrawler.Utils.Settings
6 | {
7 | public class Settings : ISettings
8 | {
9 | public string BaseUrl =>
10 | ConfigurationManager.AppSettings[Constants.AppSettings.BaseUrl].Trim('/');
11 |
12 | public string ValidUrlRegex =>
13 | ConfigurationManager.AppSettings[Constants.AppSettings.ValidUrlRegex];
14 |
15 | public bool CheckImages =>
16 | ConfigurationManager.AppSettings[Constants.AppSettings.CheckImages].ToBool();
17 |
18 | public bool OnlyReportBrokenLinksToOutput =>
19 | ConfigurationManager.AppSettings[Constants.AppSettings.OnlyReportBrokenLinksToOutput].ToBool();
20 |
21 | public string SlackWebHookUrl =>
22 | ConfigurationManager.AppSettings[Constants.AppSettings.SlackWebHookUrl];
23 |
24 | public string SlackWebHookBotName =>
25 | ConfigurationManager.AppSettings[Constants.AppSettings.SlackWebHookBotName];
26 |
27 | public string SlackWebHookBotIconEmoji =>
28 | ConfigurationManager.AppSettings[Constants.AppSettings.SlackWebHookBotIconEmoji];
29 |
30 | public string SlackWebHookBotMessageFormat =>
31 | ConfigurationManager.AppSettings[Constants.AppSettings.SlackWebHookBotMessageFormat];
32 |
33 | public string CsvFilePath =>
34 | ConfigurationManager.AppSettings[Constants.AppSettings.CsvFilePath];
35 |
36 | public bool CsvOverwrite =>
37 | ConfigurationManager.AppSettings[Constants.AppSettings.CsvOverwrite].ToBool();
38 |
39 | public string CsvDelimiter =>
40 | ConfigurationManager.AppSettings[Constants.AppSettings.CsvDelimiter];
41 |
42 | public bool PrintSummary =>
43 | ConfigurationManager.AppSettings[Constants.AppSettings.PrintSummary].ToBool();
44 |
45 | public bool IsSuccess(HttpStatusCode statusCode)
46 | {
47 | var configuredCodes = ConfigurationManager.AppSettings[Constants.AppSettings.SuccessHttpStatusCodes] ?? "";
48 | return statusCode.IsSuccess(configuredCodes);
49 | }
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/Utils/StructureMapRegistry.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections;
3 | using System.Configuration;
4 | using System.Linq;
5 | using LinkCrawler.Utils.Outputs;
6 | using LinkCrawler.Utils.Settings;
7 | using StructureMap.Configuration.DSL;
8 | using StructureMap.Graph;
9 |
10 | namespace LinkCrawler.Utils
11 | {
12 | public class StructureMapRegistry : Registry
13 | {
14 | public StructureMapRegistry()
15 | {
16 | Scan(scan =>
17 | {
18 | scan.TheCallingAssembly();
19 | scan.WithDefaultConventions();
20 | });
21 |
22 | var providers = (ConfigurationManager.GetSection(Constants.AppSettings.OutputProviders) as Hashtable)?
23 | .Cast()
24 | .ToDictionary(d => d.Key.ToString(), d => d.Value.ToString());
25 |
26 | if (providers != null)
27 | {
28 | var pluginType = typeof(IOutput);
29 |
30 | foreach (var provider in providers)
31 | {
32 | var concreteType = Type.GetType(provider.Value);
33 |
34 | if (concreteType == null)
35 | {
36 | throw new ConfigurationErrorsException(string.Format(
37 | "Output provider '{0}' not found: {1}",
38 | provider.Key,
39 | provider.Value
40 | ));
41 | }
42 |
43 | if (!concreteType.GetInterfaces().Contains(pluginType))
44 | {
45 | throw new ConfigurationErrorsException(string.Format(
46 | "Output provider '{0}' does not implement IOutput: {1}",
47 | provider.Key,
48 | provider.Value
49 | ));
50 | }
51 |
52 | For(pluginType).Add(concreteType).Named(provider.Key);
53 | }
54 | }
55 | }
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/LinkCrawler/LinkCrawler/packages.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler.Test/LinkCrawler.Test.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net7.0
5 | enable
6 | enable
7 |
8 | false
9 |
10 |
11 |
12 |
13 |
14 |
15 | runtime; build; native; contentfiles; analyzers; buildtransitive
16 | all
17 |
18 |
19 | runtime; build; native; contentfiles; analyzers; buildtransitive
20 | all
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler.Test/UnitTest1.cs:
--------------------------------------------------------------------------------
1 | namespace LinkCrawler.Test
2 | {
3 | public class UnitTest1
4 | {
5 | [Fact]
6 | public void Test1()
7 | {
8 |
9 | }
10 | }
11 | }
--------------------------------------------------------------------------------
/LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler.Test/Usings.cs:
--------------------------------------------------------------------------------
1 | global using Xunit;
--------------------------------------------------------------------------------
/LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.4.33103.184
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkCrawler", "LinkCrawler\LinkCrawler.csproj", "{DD0C6451-84A7-4DA6-8BF5-32EB97BE32FE}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Any CPU = Debug|Any CPU
11 | Release|Any CPU = Release|Any CPU
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {DD0C6451-84A7-4DA6-8BF5-32EB97BE32FE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | {DD0C6451-84A7-4DA6-8BF5-32EB97BE32FE}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | {DD0C6451-84A7-4DA6-8BF5-32EB97BE32FE}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | {DD0C6451-84A7-4DA6-8BF5-32EB97BE32FE}.Release|Any CPU.Build.0 = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {0815F252-9C5A-42C9-A1CC-743850B55836}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler/LinkCrawler.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net7.0
6 | enable
7 | enable
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/LinkCrawler_DotNet_7/LinkCrawler/LinkCrawler/Program.cs:
--------------------------------------------------------------------------------
1 | // See https://aka.ms/new-console-template for more information
2 | Console.WriteLine("Hello, World!");
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LinkCrawler
2 | Simple C# console application that will crawl the given webpage for broken image-tags and hyperlinks. The result of this will be written to output. Right now we have these outputs: console, csv, slack.
3 |
4 |
5 | ## Why?
6 | Because it could be useful to know when a webpage you have responsibility for displays broken links to it's users. I have this running continuously, but you don't have to. For instance, after upgrading your CMS, changing database-scheme, migrating content etc, it can be relevant to know if this did or did not not introduce broken links. Just run this tool one time and you will know exactly how many links are broken, where they link to, and where they are located.
7 |
8 | ## Build
9 | Clone repo :point_right: open solution in Visual Studio :point_right: build :facepunch:
10 |
11 | AppVeyor is used as CI, so when code is pushed to this repo the solution will get built and all tests will be run.
12 |
13 | | Branch | Build status |
14 | | :----- | :---------------------------------------|
15 | | develop | [](https://ci.appveyor.com/project/hmol/linkcrawler/branch/develop) |
16 | | master | [](https://ci.appveyor.com/project/hmol/linkcrawler/branch/master) |
17 |
18 | ## AppSettings
19 |
20 | | Key | Usage |
21 | | :-------------------------- | :---------------------------------------|
22 | | ```BaseUrl ``` | Base url for site to crawl |
23 | | ```SuccessHttpStatusCodes``` | HTTP status codes that are considered "successful". Example: "1xx,2xx,302,303" |
24 | | ```CheckImages``` | If true,
``` that controls what output should be used.
37 |
38 | ## Output to file
39 | ```LinkCrawler.exe >> crawl.log``` will save output to file.
40 | 
41 |
42 | ## Output to slack
43 | If configured correctly, the defined slack-webhook will be notified about broken links.
44 | 
45 |
46 | ##How I use it
47 | I have it running as an Webjob in Azure, scheduled every 4 days. It will notify the slack-channel where the editors of the website dwells.
48 |
49 | Creating a webjob is simple. Just put your compiled project files (/bin/) inside a .zip, and upload it.
50 | 
51 |
52 | Schedule it.
53 |
54 | 
55 |
56 | The output of a webjob is available because Azure saves it in log files.
57 | 
58 |
59 |
60 | Read more about Azure Webjobs: https://azure.microsoft.com/en-us/documentation/articles/web-sites-create-web-jobs/
61 |
62 | Read more about Slack incoming webhooks: https://api.slack.com/incoming-webhooks
63 |
--------------------------------------------------------------------------------