├── README.md
├── References
└── HtmlAgilityPack
│ └── HtmlAgilityPack.dll
├── Rhythm.Staticize
├── IBehavior.cs
├── StaticizeStepChangedEventArgs.cs
├── Validation
│ ├── IValidation.cs
│ ├── ValidationType.cs
│ ├── GenerationSuccessfulValidation.cs
│ ├── ValidationDelegateTaken.cs
│ ├── ValidationGroup.cs
│ ├── ValidationResult.cs
│ ├── ValidationExtensions.cs
│ ├── InternalALinkValidation.cs
│ ├── XPathValidation.cs
│ ├── ReferenceResourcesExistingValidation.cs
│ └── ValidationProjection.cs
├── Extensions
│ ├── CollectionExtensions.cs
│ ├── UrlExtensions.cs
│ └── IOExtensions.cs
├── ResourcesDownload
│ ├── JavascriptResourcesDownloadBehavior.cs
│ ├── CssResourcesDownloadBehavior.cs
│ ├── ImageResourcesDownloadBehavior.cs
│ ├── ResourcesDownloadException.cs
│ └── ResourcesDownloadBaseBehavior.cs
├── Properties
│ └── AssemblyInfo.cs
├── ResourcesManager.cs
├── StaticizeStep.cs
├── HtmlStaticizeContext.cs
├── IUriResourcesFromLocalFileSystemReslover.cs
├── Rhythm.Staticize.csproj
├── StaticizeStepStatus.cs
└── Staticizer.cs
├── Rhythm.Staticize.Tests
├── StaticizeCoreTest.cs
├── Properties
│ └── AssemblyInfo.cs
├── Rhythm.Staticize.Tests.csproj
└── StaticizeTest.cs
├── Staticize.sln
├── .gitattributes
└── .gitignore
/README.md:
--------------------------------------------------------------------------------
1 | # Staticize
2 |
3 |
4 |
--------------------------------------------------------------------------------
/References/HtmlAgilityPack/HtmlAgilityPack.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RockNHawk/Staticize/HEAD/References/HtmlAgilityPack/HtmlAgilityPack.dll
--------------------------------------------------------------------------------
/Rhythm.Staticize/IBehavior.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | ///
9 | /// 添加 IBehavior ,它会在 HTML 被生成,并加载后执行。
10 | /// 因此你可以使用 IBehavior 对 HTML 进行读取,它会在验证之前执行。
11 | ///
12 | public interface IBehavior
13 | {
14 | void Process(HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext context);
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/StaticizeStepChangedEventArgs.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | ///
9 | /// 关键状态变更事件参数
10 | ///
11 | public class StaticizeStepChangedEventArgs : System.EventArgs
12 | {
13 | ///
14 | /// 表示静态化过程关键步骤
15 | ///
16 | public StaticizeStep Step { get; set; }
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/Rhythm.Staticize.Tests/StaticizeCoreTest.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using Microsoft.VisualStudio.TestTools.UnitTesting;
3 | using System.Collections.Generic;
4 | using System.Linq;
5 | using System.Text;
6 | using System.Threading.Tasks;
7 | using System.Web;
8 | using System.Net;
9 | using Rhythm.Staticize;
10 |
11 | namespace Rhythm.Staticize
12 | {
13 | [TestClass]
14 | public class StaticizeCoreTest
15 | {
16 | [TestMethod]
17 | public void UrlExtensionsTest()
18 | {
19 | var uri = new Uri("http://localhost/a/b/c/1.html");
20 | string parentDirectory = UrlExtensions.GetFileDirectory(uri);
21 | Assert.AreEqual(parentDirectory, "/a/b/c/");
22 |
23 | var parentAddress = UrlExtensions.GetParent(uri);
24 | Assert.AreEqual(parentAddress, "http://localhost/a/b/c/");
25 | }
26 |
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/IValidation.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | ///
10 | /// 表示一个验证规则
11 | ///
12 | public interface IValidation
13 | {
14 | ///
15 | /// 执行验证。
16 | ///
17 | /// 被验证的 HtmlDocument
18 | /// 获取验证不通过时的错误提示信息。
19 | string Validate(HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext status);
20 |
21 | ///
22 | /// 获取此验证的名称信息(用于向用户界面显示)。
23 | ///
24 | string Name { get; }
25 |
26 | ///
27 | /// 验证类型
28 | ///
29 | ValidationType Type { get; }
30 | }
31 | }
--------------------------------------------------------------------------------
/Rhythm.Staticize/Extensions/CollectionExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | public static class CollectionExtensions
9 | {
10 | public static void AddRange(this IList source, IEnumerable collection)
11 | {
12 | if (collection == null)
13 | {
14 | throw new ArgumentNullException("collection");
15 | }
16 | List list = source as List;
17 | if (list != null)
18 | {
19 | list.AddRange(collection);
20 | }
21 | else
22 | {
23 | foreach (var item in collection)
24 | {
25 | source.Add(item);
26 | }
27 | }
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/ResourcesDownload/JavascriptResourcesDownloadBehavior.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | ///
10 | /// 提供对被爬行页面中引用的 Javascript 资源的下载支持。
11 | ///
12 | public class JavascriptResourcesDownloadBehavior : ResourcesDownloadBaseBehavior
13 | {
14 | public JavascriptResourcesDownloadBehavior(String outputBaseDirectory, IUriResourcesFromLocalFileSystemReslover resourceFileReslover = null)
15 | : base(outputBaseDirectory, "//script[@src]", resourceFileReslover)
16 | {
17 | }
18 |
19 | protected override void OnResourceParsed(Uri[] resourceUris, HtmlStaticizeContext context)
20 | {
21 | base.OnResourceParsed(resourceUris, context);
22 | context.Resources.ReferenceJavascripts.AddRange(resourceUris);
23 | }
24 |
25 |
26 | }
27 | }
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/ValidationType.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | ///
9 | /// 验证的类型(标签完整性、链接完整性、资源文件完整性)。
10 | ///
11 | public enum ValidationType
12 | {
13 | [System.ComponentModel.DataAnnotations.Display(Name = "其它")]
14 | Other,
15 | ///
16 | /// 标签完整性
17 | ///
18 | [System.ComponentModel.DataAnnotations.Display(Name = "标签完整性")]
19 | Tag,
20 | ///
21 | /// 链接完整性
22 | ///
23 | [System.ComponentModel.DataAnnotations.Display(Name = "链接完整性")]
24 | Link,
25 | ///
26 | /// 资源文件完整性
27 | ///
28 | [System.ComponentModel.DataAnnotations.Display(Name = "资源文件完整性")]
29 | Resource,
30 | ///
31 | /// 内容正确性
32 | ///
33 | [System.ComponentModel.DataAnnotations.Display(Name = "内容正确性")]
34 | Content,
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // 有关程序集的常规信息通过以下
6 | // 特性集控制。更改这些特性值可修改
7 | // 与程序集关联的信息。
8 | [assembly: AssemblyTitle("Staticize")]
9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("Deepst")]
12 | [assembly: AssemblyProduct("Staticize")]
13 | [assembly: AssemblyCopyright("Copyright © Deepst 2013")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // 将 ComVisible 设置为 false 使此程序集中的类型
18 | // 对 COM 组件不可见。如果需要从 COM 访问此程序集中的类型,
19 | // 则将该类型上的 ComVisible 特性设置为 true。
20 | [assembly: ComVisible(false)]
21 |
22 | // 如果此项目向 COM 公开,则下列 GUID 用于类型库的 ID
23 | [assembly: Guid("023aa712-4366-47b8-b8c8-f7230b800240")]
24 |
25 | // 程序集的版本信息由下面四个值组成:
26 | //
27 | // 主版本
28 | // 次版本
29 | // 生成号
30 | // 修订号
31 | //
32 | // 可以指定所有这些值,也可以使用“生成号”和“修订号”的默认值,
33 | // 方法是按如下所示使用“*”:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/Rhythm.Staticize.Tests/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // 有关程序集的常规信息通过以下特性集
6 | // 控制。更改这些特性值可修改
7 | // 与程序集关联的信息。
8 | [assembly: AssemblyTitle("Rhythm.Staticize.Tests")]
9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("Microsoft")]
12 | [assembly: AssemblyProduct("Rhythm.Staticize.Tests")]
13 | [assembly: AssemblyCopyright("Copyright © Microsoft 2013")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // 将 ComVisible 设置为 false 会使此程序集中的类型
18 | // 对 COM 组件不可见。如果需要从 COM 访问此程序集中的类型,
19 | // 请将该类型上的 ComVisible 特性设置为 true。
20 | [assembly: ComVisible(false)]
21 |
22 | // 如果此项目向 COM 公开,则下列 GUID 用于类型库的 ID
23 | [assembly: Guid("8d3af334-e9bf-4b55-b18d-4a0db4fc3274")]
24 |
25 | // 程序集的版本信息由以下四个值组成:
26 | //
27 | // 主版本
28 | // 次版本
29 | // 生成号
30 | // 修订号
31 | //
32 | // 可以指定所有这些值,也可以使用“生成号”和“修订号”的默认值,
33 | // 方法是按如下所示使用“*”:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/ResourcesManager.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 |
9 | ///
10 | /// 提供对被静态化网页总引用资源的计数支持。
11 | ///
12 | public class ResourcesManager
13 | {
14 | ///
15 | /// 初始化 ResourcesManager 的新实例
16 | ///
17 | public ResourcesManager()
18 | {
19 | ReferenceCsses = new List(3);
20 | ReferenceJavascripts = new List(3);
21 | ReferenceImages = new List(5);
22 | NotExistsFiles = new Dictionary();
23 | }
24 |
25 | ///
26 | /// 获取页面 HTML 中引用的 CSS
27 | ///
28 | public IList ReferenceCsses { get; private set; }
29 |
30 | ///
31 | /// 获取页面 HTML 中引用的 JS
32 | ///
33 | public IList ReferenceJavascripts { get; private set; }
34 |
35 | ///
36 | /// 获取页面 HTML 中引用的图片
37 | ///
38 | public IList ReferenceImages { get; private set; }
39 |
40 | ///
41 | /// 获取页面 HTML 中有引用但实际不存在的图片。
42 | ///
43 | public IDictionary NotExistsFiles { get; private set; }
44 | }
45 |
46 | }
--------------------------------------------------------------------------------
/Rhythm.Staticize/ResourcesDownload/CssResourcesDownloadBehavior.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | ///
10 | /// 提供对被爬行页面中引用的 CSS 资源的下载支持。
11 | ///
12 | public class CssResourcesDownloadBehavior : ResourcesDownloadBaseBehavior
13 | {
14 | public CssResourcesDownloadBehavior(String outputBaseDirectory, IUriResourcesFromLocalFileSystemReslover resourceFileReslover = null)
15 | : base(outputBaseDirectory, @"//link[@rel='stylesheet']", resourceFileReslover)
16 | {
17 | }
18 |
19 | protected override string[] GetSrcAttributes(HtmlAgilityPack.HtmlNodeCollection nodes)
20 | {
21 | var srcAttributes = (from node in nodes
22 | where !String.IsNullOrWhiteSpace(node.GetAttributeValue("href", null))
23 | select node.GetAttributeValue("href", null)).ToArray();
24 | return srcAttributes;
25 | }
26 |
27 | protected override void OnResourceParsed(Uri[] resourceUris, HtmlStaticizeContext context)
28 | {
29 | base.OnResourceParsed(resourceUris, context);
30 | context.Resources.ReferenceCsses.AddRange(resourceUris);
31 | }
32 |
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/ResourcesDownload/ImageResourcesDownloadBehavior.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | ///
10 | /// 提供对被爬行页面中引用的图片资源的下载支持。
11 | ///
12 | public class ImageResourcesDownloadBehavior : ResourcesDownloadBaseBehavior
13 | {
14 | ///
15 | /// 初始化 ImageResourcesDownloadBehavior 的新实例。
16 | ///
17 | /// 图片输出文件夹
18 | ///
19 | public ImageResourcesDownloadBehavior(String outputBaseDirectory, IUriResourcesFromLocalFileSystemReslover resourceFileReslover = null)
20 | : base(outputBaseDirectory, "//img[@src]", resourceFileReslover)
21 | {
22 | }
23 |
24 | ///
25 | /// 当资源URL被正确解析,即将被下载时回调
26 | ///
27 | /// 当资源URL(集合)
28 | ///
29 | protected override void OnResourceParsed(Uri[] resourceUris, HtmlStaticizeContext context)
30 | {
31 | context.Resources.ReferenceImages.AddRange(resourceUris);
32 | base.OnResourceParsed(resourceUris, context);
33 | }
34 |
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/GenerationSuccessfulValidation.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | class GenerationSuccessfulValidation : IValidation
9 | {
10 | ///
11 | /// 获取 GenerationValidation 的实例。
12 | ///
13 | public static readonly IValidation Instance = new GenerationSuccessfulValidation();
14 |
15 | string IValidation.Validate(HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext status)
16 | {
17 | var errorMessage = new StringBuilder();
18 | if (status.GenerationError != null)
19 | {
20 | var err = status.GenerationError;
21 | errorMessage.AppendFormat("生成HTML期间发生错误:{0}\r\n{1}\r\n", err.Message, err.ToString());
22 | }
23 | if (status.DocumentLoadError != null)
24 | {
25 | var err = status.DocumentLoadError;
26 | errorMessage.AppendFormat("加载HTML文档树期间发生错误:{0}\r\n{1}\r\n", err.Message, err.ToString());
27 | }
28 | return errorMessage.Length == 0 ? null : errorMessage.ToString();
29 | }
30 |
31 | string IValidation.Name { get { return "页面HTML是否成功生成。"; } }
32 |
33 |
34 | public ValidationType Type { get { return ValidationType.Tag; } }
35 |
36 |
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Extensions/UrlExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | public static class UrlExtensions
9 | {
10 | ///
11 | /// 获取页面的Uri所在的目录名称。(仅支持页面)
12 | ///
13 | ///
14 | /// 所在的目录名称
15 | public static string GetFileDirectory(this Uri uri)
16 | {
17 | //input:http://xxx/xx.html
18 | //return:/
19 | var baseUrlString = uri.GetParent();
20 | var baseUrl = new Uri(baseUrlString);
21 | return baseUrl.LocalPath;
22 | }
23 |
24 | ///
25 | /// 获取URI的上一级的URI Address。
26 | ///
27 | ///
28 | /// 上一级的URI Address。
29 | public static string GetParent(this Uri uri)
30 | {
31 | //input:http://xxx/xx.html
32 | //return:http://xxx
33 | String uriString = uri.ToString();
34 | int lastSlash = uriString.LastIndexOf('/');
35 | if (lastSlash == -1)
36 | {
37 | return uriString;
38 | }
39 | else
40 | {
41 | String baseDir = uriString.Substring(0, lastSlash + 1);
42 | return baseDir;//.TidyUri();
43 | }
44 | }
45 |
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/ResourcesDownload/ResourcesDownloadException.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | [Serializable]
10 | public class ResourcesDownloadException : System.Exception
11 | {
12 |
13 | ///
14 | /// 初始化此类的新实例
15 | ///
16 | public ResourcesDownloadException()
17 | { }
18 |
19 | ///
20 | /// 使用指定的错误信息初始化此类的新实例。
21 | ///
22 | /// 解释异常原因的错误信息
23 | public ResourcesDownloadException(string message)
24 | : base(message)
25 | {
26 |
27 | }
28 |
29 | ///
30 | /// 使用指定错误消息和对作为此异常原因的内部异常的引用来初始化此类的新实例。
31 | ///
32 | /// 解释异常原因的错误信息
33 | /// 导致当前异常的异常;如果未指定内部异常,则是一个 null 引用。
34 | public ResourcesDownloadException(string message, System.Exception innerException)
35 | : base(message, innerException)
36 | {
37 |
38 | }
39 |
40 | public Uri Url { get; set; }
41 |
42 | public override void GetObjectData(System.Runtime.Serialization.SerializationInfo info, System.Runtime.Serialization.StreamingContext context)
43 | {
44 | if (info == null)
45 | {
46 | throw new ArgumentNullException("info");
47 | }
48 | base.GetObjectData(info, context);
49 | info.AddValue("Url", this.Url, typeof(string));
50 | }
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/StaticizeStep.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | ///
9 | /// 表示静态化过程关键步骤。
10 | ///
11 | public enum StaticizeStep
12 | {
13 | ///
14 | /// 正在初始化
15 | ///
16 | [System.ComponentModel.DataAnnotations.Display(Name = "正在初始化")]
17 | Initialize,
18 | ///
19 | /// 正在生成 HTML
20 | ///
21 | [System.ComponentModel.DataAnnotations.Display(Name = "正在生成 HTML")]
22 | GenerationHtml,
23 | ///
24 | /// HTML 生成完成
25 | ///
26 | [System.ComponentModel.DataAnnotations.Display(Name = "HTML 生成完成")]
27 | GenerationHtmlCompleted,
28 | ///
29 | /// 正在验证
30 | ///
31 | [System.ComponentModel.DataAnnotations.Display(Name = "正在验证")]
32 | Validation,
33 | ///
34 | /// 验证完成
35 | ///
36 | [System.ComponentModel.DataAnnotations.Display(Name = "验证完成")]
37 | ValidationCompleted,
38 | ///
39 | /// 已完成
40 | ///
41 | [System.ComponentModel.DataAnnotations.Display(Name = "已完成")]
42 | Completed,
43 | ///
44 | /// 静态化过程被意外终止
45 | /// 静态化执行过程中意外停止了,可能是线程 Crash 或计算机关机造成的。
46 | ///
47 | [System.ComponentModel.DataAnnotations.Display(Name = "静态化过程被意外终止", Description = "静态化执行过程中意外停止了,可能是线程 Crash 或计算机关机造成的。")]
48 | Crashed,
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/HtmlStaticizeContext.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | ///
9 | /// 网页静态化时的上下文信息。
10 | /// 提供对爬行网页过程中状态信息存储的支持。
11 | /// 提供错误信息列表。
12 | ///
13 | ///
14 | public class HtmlStaticizeContext
15 | {
16 | internal Uri uri;
17 | internal String fileName;
18 | internal Exception generationError;
19 | internal IList validationResults;
20 |
21 | ///
22 | /// 初始化 HtmlStaticizeContext 的新实例
23 | ///
24 | public HtmlStaticizeContext()
25 | {
26 | Resources = new ResourcesManager();
27 | Errors = new List();
28 | }
29 |
30 | ///
31 | /// 获取网页的Uri信息。
32 | ///
33 | public Uri Uri { get { return uri; } }
34 |
35 | ///
36 | /// 获取是否生成失败。
37 | /// 如果不为null,表示 HTML 生成失败。
38 | ///
39 | public System.Exception GenerationError { get { return generationError; } }
40 |
41 | ///
42 | /// 获取是否加载 HTML 失败(与 XMLDocument.LoadXML 方法同理,如果 Load 失败,表示 HTML document 格式不正确)。
43 | ///
44 | public System.Exception DocumentLoadError { get; internal set; }
45 |
46 | ///
47 | /// 此页面静态化过程中的错误信息。
48 | /// 如发生404等错误均会在此记录。
49 | ///
50 | public IList Errors { get; internal set; }
51 |
52 | ///
53 | /// 网页引用资源信息
54 | ///
55 | public ResourcesManager Resources { get; internal set; }
56 |
57 | ///
58 | /// 对此网页静态化后的验证结果(集合)
59 | ///
60 | public IList ValidationResults { get { return validationResults; } }
61 | }
62 |
63 | }
64 |
--------------------------------------------------------------------------------
/Staticize.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 2012
4 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Rhythm.Staticize.Tests", "Rhythm.Staticize.Tests\Rhythm.Staticize.Tests.csproj", "{BB81F20F-68E2-415B-B885-30DEA4B7CD4D}"
5 | EndProject
6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "References", "References", "{9210C7F1-32B1-4C5B-8D9E-F38C06E96AE7}"
7 | EndProject
8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "HtmlAgilityPack", "HtmlAgilityPack", "{9B4B0CB9-A03F-4680-AB08-150C30F07AF0}"
9 | ProjectSection(SolutionItems) = preProject
10 | References\HtmlAgilityPack\HtmlAgilityPack.dll = References\HtmlAgilityPack\HtmlAgilityPack.dll
11 | References\HtmlAgilityPack\HtmlAgilityPack.XML = References\HtmlAgilityPack\HtmlAgilityPack.XML
12 | EndProjectSection
13 | EndProject
14 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Rhythm.Staticize", "Rhythm.Staticize\Rhythm.Staticize.csproj", "{C1EBD5EC-6861-4C79-A954-6B58EC944FF8}"
15 | EndProject
16 | Global
17 |
18 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
19 | Debug|Any CPU = Debug|Any CPU
20 | Release|Any CPU = Release|Any CPU
21 | EndGlobalSection
22 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
23 | {BB81F20F-68E2-415B-B885-30DEA4B7CD4D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
24 | {BB81F20F-68E2-415B-B885-30DEA4B7CD4D}.Debug|Any CPU.Build.0 = Debug|Any CPU
25 | {BB81F20F-68E2-415B-B885-30DEA4B7CD4D}.Release|Any CPU.ActiveCfg = Release|Any CPU
26 | {BB81F20F-68E2-415B-B885-30DEA4B7CD4D}.Release|Any CPU.Build.0 = Release|Any CPU
27 | {C1EBD5EC-6861-4C79-A954-6B58EC944FF8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
28 | {C1EBD5EC-6861-4C79-A954-6B58EC944FF8}.Debug|Any CPU.Build.0 = Debug|Any CPU
29 | {C1EBD5EC-6861-4C79-A954-6B58EC944FF8}.Release|Any CPU.ActiveCfg = Release|Any CPU
30 | {C1EBD5EC-6861-4C79-A954-6B58EC944FF8}.Release|Any CPU.Build.0 = Release|Any CPU
31 | EndGlobalSection
32 | GlobalSection(SolutionProperties) = preSolution
33 | HideSolutionNode = FALSE
34 | EndGlobalSection
35 | GlobalSection(NestedProjects) = preSolution
36 | {9B4B0CB9-A03F-4680-AB08-150C30F07AF0} = {9210C7F1-32B1-4C5B-8D9E-F38C06E96AE7}
37 | EndGlobalSection
38 | EndGlobal
39 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Extensions/IOExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | public static class IOExtensions
9 | {
10 | public static bool IsDirectorySeparator(this char chr)
11 | {
12 | return chr == '/' || chr == '\\';
13 | }
14 |
15 | public static int IndexOfDirectorySeparator(this string path)
16 | {
17 | if (path == null)
18 | {
19 | return -1;
20 | }
21 | int index1 = path.IndexOf('/');
22 | int index2 = path.IndexOf('\\');
23 | return index1 > index2 ? index1 : index2;
24 | }
25 |
26 |
27 | ///
28 | /// 移光最前面的斜杠,不管是正斜还是反斜。
29 | ///
30 | ///
31 | ///
32 | public static string RemoveStartDirectorySeparator(this string path)
33 | {
34 | if (path == null)
35 | {
36 | return null;
37 | }
38 | if (path.Length == 0)
39 | {
40 | return path;
41 | }
42 | if (!path[0].IsDirectorySeparator())
43 | {
44 | return path;
45 | }
46 | do
47 | {
48 | path = path.Substring(1);
49 | } while (path.Length != 0 && path[0].IsDirectorySeparator());
50 | return path;
51 | }
52 |
53 | ///
54 | /// 移光最后的斜杠,不管是正斜还是反斜。
55 | ///
56 | ///
57 | ///
58 | public static string RemoveLastDirectorySeparator(this string path)
59 | {
60 | if (path == null)
61 | {
62 | return null;
63 | }
64 | int length = path.Length;
65 | if (length == 0)
66 | {
67 | return path;
68 | }
69 | if (!path[(length - 1)].IsDirectorySeparator())
70 | {
71 | return path;
72 | }
73 | do
74 | {
75 | // 移掉最后一个
76 | path = path.Substring(0, length - 1);
77 | length--;
78 | } while (length > 0 && path[length - 1].IsDirectorySeparator());
79 | return path;
80 | }
81 |
82 |
83 |
84 |
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/ValidationDelegateTaken.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | ///
10 | /// 定义一个对 HtmlDocument 进行验证的规则。
11 | ///
12 | public class ValidationDelegateTaken : IValidation
13 | {
14 | String errorMessage;
15 | System.Func documentValidation;
16 |
17 | ///
18 | /// 初始化 HtmlDocumentValidation 的新实例。
19 | ///
20 | /// 此验证的名称信息(用于向用户界面显示)。
21 | /// 一个委托,用于验证 HtmlDocument 是否符合规则。
22 | /// 验证不符合规则时的提示信息。
23 | public ValidationDelegateTaken(String name, ValidationType validationType, System.Func documentValidation, String errorMessage)
24 | {
25 | this.documentValidation = documentValidation;
26 | this.errorMessage = errorMessage;
27 | this.Name = name;
28 | this.Type = validationType;
29 | if (documentValidation == null)
30 | {
31 | throw new ArgumentNullException("documentValidation");
32 | }
33 | if (errorMessage == null)
34 | {
35 | throw new ArgumentNullException("errorMessage");
36 | }
37 | }
38 |
39 | ///
40 | /// 执行验证。
41 | ///
42 | /// 被验证的 HtmlDocument
43 | /// 验证通过则返回true。
44 | public string Validate(HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext status)
45 | {
46 | if (document == null)
47 | {
48 | throw new ArgumentNullException("document");
49 | }
50 | return documentValidation.Invoke(document) ? null : errorMessage;
51 | }
52 |
53 | ///
54 | /// 获取验证不通过时的错误提示信息。
55 | ///
56 | public String ErrorMessage { get { return errorMessage; } }
57 |
58 | ///
59 | /// 获取此验证的名称信息(用于向用户界面显示)。
60 | ///
61 | public String Name { get; private set; }
62 |
63 | public ValidationType Type { get; set; }
64 |
65 | public override string ToString()
66 | {
67 | return this.Name;
68 | }
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Set default behavior to automatically normalize line endings.
3 | ###############################################################################
4 | * text=auto
5 |
6 | ###############################################################################
7 | # Set default behavior for command prompt diff.
8 | #
9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs diff=csharp
14 |
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln merge=binary
26 | #*.csproj merge=binary
27 | #*.vbproj merge=binary
28 | #*.vcxproj merge=binary
29 | #*.vcproj merge=binary
30 | #*.dbproj merge=binary
31 | #*.fsproj merge=binary
32 | #*.lsproj merge=binary
33 | #*.wixproj merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj merge=binary
36 | #*.wwaproj merge=binary
37 |
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg binary
44 | #*.png binary
45 | #*.gif binary
46 |
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | #
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the
52 | # entries below.
53 | ###############################################################################
54 | #*.doc diff=astextplain
55 | #*.DOC diff=astextplain
56 | #*.docx diff=astextplain
57 | #*.DOCX diff=astextplain
58 | #*.dot diff=astextplain
59 | #*.DOT diff=astextplain
60 | #*.pdf diff=astextplain
61 | #*.PDF diff=astextplain
62 | #*.rtf diff=astextplain
63 | #*.RTF diff=astextplain
64 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/IUriResourcesFromLocalFileSystemReslover.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | ///
9 | /// 定义将Uri路径转换为本地文件系统文件的支持。
10 | ///
11 | public interface IUriResourcesFromLocalFileSystemReslover
12 | {
13 | ///
14 | /// 定义将Uri转换为本地路径。
15 | ///
16 | /// 表示一个资源的链接。
17 | /// 该资源的本地路径。
18 | String ResloveLocalPath(Uri uri);
19 |
20 | ///
21 | /// 如果被静态化的网站和静态化程序处于同一个计算机中,尝试直接从本地复制文件。
22 | ///
23 | ///
24 | ///
25 | /// 是否复制成功
26 | Boolean TryCopyFromLocal(Uri uri, String saveFilePath);
27 | }
28 |
29 | ///
30 | /// 默认 IUriToLocalFilePathReslover 的实现。
31 | ///
32 | public class DefaultUriToLocalFilePathReslover : IUriResourcesFromLocalFileSystemReslover
33 | {
34 | ///
35 | /// 获取 DefaultUriToLocalFilePathReslover 的实例。
36 | ///
37 | public static readonly IUriResourcesFromLocalFileSystemReslover Instance = new DefaultUriToLocalFilePathReslover();
38 |
39 | String baseDir = AppDomain.CurrentDomain.BaseDirectory;
40 |
41 | ///
42 | /// 定义将Uri转换为本地路径。
43 | ///
44 | /// 表示一个资源的链接。
45 | /// 该资源的本地路径。
46 | public string ResloveLocalPath(Uri uri)
47 | {
48 | return FormatUriToLocalPath(uri);
49 | }
50 |
51 | static string FormatUriToLocalPath(Uri uri)
52 | {
53 | String localPath = uri.IsAbsoluteUri ? uri.LocalPath : uri.ToString();
54 | localPath = localPath[0] == '/' || localPath[0] == '\\' ? localPath.Substring(1, localPath.Length - 1) : localPath;
55 | return localPath;
56 | }
57 |
58 | ///
59 | /// 如果被静态化的网站和静态化程序处于同一个计算机中,尝试直接从本地复制文件。
60 | ///
61 | ///
62 | ///
63 | /// 是否复制成功
64 | public bool TryCopyFromLocal(Uri uri, string saveFilePath)
65 | {
66 | String resourceFilePath = System.IO.Path.Combine(baseDir, FormatUriToLocalPath(uri));
67 | if (System.IO.File.Exists(resourceFilePath))
68 | {
69 | try
70 | {
71 | System.IO.File.Copy(resourceFilePath, saveFilePath);
72 | return true;
73 | }
74 | catch (Exception)
75 | {
76 | return false;
77 | }
78 | }
79 | return false;
80 | }
81 |
82 | }
83 |
84 | }
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 |
4 | # User-specific files
5 | *.suo
6 | *.user
7 | *.sln.docstates
8 |
9 | # Build results
10 |
11 | [Dd]ebug/
12 | [Rr]elease/
13 | x64/
14 | build/
15 | [Bb]in/
16 | [Oo]bj/
17 |
18 | # Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
19 | !packages/*/build/
20 |
21 | # MSTest test Results
22 | [Tt]est[Rr]esult*/
23 | [Bb]uild[Ll]og.*
24 |
25 | *_i.c
26 | *_p.c
27 | *.ilk
28 | *.meta
29 | *.obj
30 | *.pch
31 | *.pdb
32 | *.pgc
33 | *.pgd
34 | *.rsp
35 | *.sbr
36 | *.tlb
37 | *.tli
38 | *.tlh
39 | *.tmp
40 | *.tmp_proj
41 | *.log
42 | *.vspscc
43 | *.vssscc
44 | .builds
45 | *.pidb
46 | *.log
47 | *.scc
48 |
49 | # Visual C++ cache files
50 | ipch/
51 | *.aps
52 | *.ncb
53 | *.opensdf
54 | *.sdf
55 | *.cachefile
56 |
57 | # Visual Studio profiler
58 | *.psess
59 | *.vsp
60 | *.vspx
61 |
62 | # Guidance Automation Toolkit
63 | *.gpState
64 |
65 | # ReSharper is a .NET coding add-in
66 | _ReSharper*/
67 | *.[Rr]e[Ss]harper
68 |
69 | # TeamCity is a build add-in
70 | _TeamCity*
71 |
72 | # DotCover is a Code Coverage Tool
73 | *.dotCover
74 |
75 | # NCrunch
76 | *.ncrunch*
77 | .*crunch*.local.xml
78 |
79 | # Installshield output folder
80 | [Ee]xpress/
81 |
82 | # DocProject is a documentation generator add-in
83 | DocProject/buildhelp/
84 | DocProject/Help/*.HxT
85 | DocProject/Help/*.HxC
86 | DocProject/Help/*.hhc
87 | DocProject/Help/*.hhk
88 | DocProject/Help/*.hhp
89 | DocProject/Help/Html2
90 | DocProject/Help/html
91 |
92 | # Click-Once directory
93 | publish/
94 |
95 | # Publish Web Output
96 | *.Publish.xml
97 |
98 | # NuGet Packages Directory
99 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line
100 | #packages/
101 |
102 | # Windows Azure Build Output
103 | csx
104 | *.build.csdef
105 |
106 | # Windows Store app package directory
107 | AppPackages/
108 |
109 | # Others
110 | sql/
111 | *.Cache
112 | ClientBin/
113 | [Ss]tyle[Cc]op.*
114 | ~$*
115 | *~
116 | *.dbmdl
117 | *.[Pp]ublish.xml
118 | *.pfx
119 | *.publishsettings
120 |
121 | # RIA/Silverlight projects
122 | Generated_Code/
123 |
124 | # Backup & report files from converting an old project file to a newer
125 | # Visual Studio version. Backup files are not needed, because we have git ;-)
126 | _UpgradeReport_Files/
127 | Backup*/
128 | UpgradeLog*.XML
129 | UpgradeLog*.htm
130 |
131 | # SQL Server files
132 | App_Data/*.mdf
133 | App_Data/*.ldf
134 |
135 |
136 | #LightSwitch generated files
137 | GeneratedArtifacts/
138 | _Pvt_Extensions/
139 | ModelManifest.xml
140 |
141 | # =========================
142 | # Windows detritus
143 | # =========================
144 |
145 | # Windows image file caches
146 | Thumbs.db
147 | ehthumbs.db
148 |
149 | # Folder config file
150 | Desktop.ini
151 |
152 | # Recycle Bin used on file shares
153 | $RECYCLE.BIN/
154 |
155 | # Mac desktop service store files
156 | .DS_Store
157 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/ValidationGroup.cs:
--------------------------------------------------------------------------------
1 | //using System;
2 | //using System.Collections.Generic;
3 | //using System.Linq;
4 | //using System.Text;
5 | //using System.Threading.Tasks;
6 |
7 | //namespace Rhythm.Staticize
8 | //{
9 | // ///
10 | // /// 提供对被爬行页面的 HtmlDocument 验证支持。
11 | // ///
12 | // [Obsolete ]
13 | // public class ValidationGroup
14 | // {
15 | // List m_Validations;
16 |
17 | // ///
18 | // /// 使用lambda表达式或委托创建验证规则。
19 | // ///
20 | // /// 一个委托,用于验证 HtmlDocument 是否符合规则,返回值为Boolean。
21 | // /// 验证不符合规则时的提示信息。
22 | // public ValidationGroup Add(String name, System.Func documentValidation, String errorMessage)
23 | // {
24 | // if (documentValidation == null)
25 | // {
26 | // throw new ArgumentNullException("documentValidation");
27 | // }
28 | // if (errorMessage == null)
29 | // {
30 | // throw new ArgumentNullException("errorMessage");
31 | // }
32 | // if (m_Validations == null)
33 | // {
34 | // m_Validations = new List();
35 | // }
36 | // m_Validations.Add(new ValidationDelegateTaken(name, documentValidation: documentValidation, errorMessage: errorMessage));
37 | // return this;
38 | // }
39 |
40 | // ///
41 | // /// 添加自定义验证规则。
42 | // ///
43 | // /// 自定义验证规则。
44 | // public ValidationGroup Add(params IValidation[] validations)
45 | // {
46 | // if (validations == null)
47 | // {
48 | // throw new ArgumentNullException("validations");
49 | // }
50 | // if (m_Validations == null)
51 | // {
52 | // m_Validations = new List();
53 | // }
54 | // m_Validations.AddRange(validations);
55 | // return this;
56 | // }
57 |
58 |
59 | // ///
60 | // /// 执行所有 HTML 检查
61 | // ///
62 | // ///
63 | // /// HTML 检查结果。
64 | // public ValidationResult Validate(HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext status)
65 | // {
66 | // return m_Validations == null ? null : m_Validations.Validate(document, status);
67 | // }
68 |
69 | // public override string ToString()
70 | // {
71 | // if (m_Validations != null)
72 | // {
73 | // StringBuilder builder = new StringBuilder();
74 | // for (int i = 0; i < this.m_Validations.Count; i++)
75 | // {
76 | // builder.AppendFormat("{0},", m_Validations[i].ToString());
77 | // }
78 | // return builder.ToString();
79 | // }
80 | // return base.ToString();
81 | // }
82 |
83 | // public IList Validations
84 | // {
85 | // get { return m_Validations; }
86 | // }
87 |
88 | // }
89 | //}
90 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/ValidationResult.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | ///
10 | /// 网页静态化后的验证结果
11 | ///
12 | public class ValidationResult
13 | {
14 | public ValidationResult()
15 | {
16 | //Errors = new Dictionary();
17 | }
18 |
19 | public int Id { get; set; }
20 |
21 | ///
22 | /// 链接地址(绝对路径)
23 | ///
24 | public Uri Uri { get; set; }
25 |
26 | ///
27 | /// 校验的类型
28 | ///
29 | public virtual ValidationType ValidationType { get; set; }
30 |
31 | ///
32 | /// 校验的短标题,用于在用户界面显示
33 | ///
34 | public virtual string Name { get; set; }
35 |
36 | ///
37 | /// 校验的结果信息,会在用户界面显示
38 | ///
39 | public virtual string Message { get; set; }
40 |
41 | ///
42 | /// 有的校验可能会产生异常,此属性用于记录详细异常信息。
43 | ///
44 | public virtual System.Exception Exception { get; set; }
45 |
46 |
47 | //public IDictionary Errors { get; set; }
48 |
49 | //public bool IsValid
50 | //{
51 | // get
52 | // {
53 | // var error = this.Errors;
54 | // return (error == null || error.Count == 0);
55 | // }
56 | //}
57 |
58 | //public void AddError(String name, String errorMessage)
59 | //{
60 | // String existsMessage;
61 | // if (Errors.TryGetValue(name, out existsMessage))
62 | // {
63 | // Errors[name] = String.Format("{0}\r\n{1}", existsMessage, errorMessage);
64 | // }
65 | // else
66 | // {
67 | // Errors.Add(name, errorMessage);
68 | // }
69 | //}
70 |
71 | //public void RemoveError(String name)
72 | //{
73 | // Errors.Remove(name);
74 | //}
75 |
76 | //public override string ToString()
77 | //{
78 | // var errors = this.Errors;
79 | // if (errors != null && errors.Count > 0)
80 | // {
81 | // System.Text.StringBuilder builder = new System.Text.StringBuilder(errors.Count * 20);
82 | // builder.AppendFormat("以下是对页面{0}的验证结果:\r\n\r\n", Uri);
83 | // foreach (var name in errors.Keys)
84 | // {
85 | // String message = errors[name];
86 | // builder.AppendFormat("验证[{0}]不通过:\r\n{1}\r\n", name, message);
87 | // }
88 | // return builder.ToString();
89 | // }
90 | // return "";
91 | //}
92 |
93 | public override string ToString()
94 | {
95 | if (Message != null && Message.Length > 0)
96 | {
97 | return string.Format("验证[{0}]不通过:\r\n{1}\r\n", Name, Message);
98 | }
99 | return "";
100 | }
101 |
102 |
103 | }
104 | }
105 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/ValidationExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | public static class ValidationExtensions
10 | {
11 | ///
12 | /// 执行所有 HTML 检查
13 | ///
14 | ///
15 | /// HTML 检查结果。
16 | public static IList Validate(this IEnumerable validations, HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext context)
17 | {
18 | if (document == null)
19 | {
20 | throw new ArgumentNullException("document");
21 | }
22 | if (context == null)
23 | {
24 | throw new ArgumentNullException("context");
25 | }
26 | if (validations == null)
27 | {
28 | return null;
29 | }
30 | var validationResult = new List();
31 | foreach (var vd in validations)
32 | {
33 | var errorMessage = vd.Validate(document, context);
34 | if (errorMessage != null && errorMessage.Length > 0)
35 | {
36 | validationResult.Add(new ValidationResult
37 | {
38 | Uri = context.Uri,
39 | ValidationType = vd.Type,
40 | Name = vd.Name,
41 | Message = errorMessage,
42 | });
43 | }
44 | }
45 | return validationResult;
46 | }
47 |
48 | public static IEnumerable GetValidationResults(this IEnumerable staticizeContext)
49 | {
50 | List all = new List();
51 | foreach (var item in staticizeContext)
52 | {
53 | if (item.ValidationResults != null && item.ValidationResults.Count() > 0)
54 | {
55 | all.AddRange(item.ValidationResults);
56 | }
57 | }
58 | return all;
59 | }
60 |
61 | public static void Save(this IEnumerable validateResults, String filePath)
62 | {
63 | if (validateResults == null)
64 | {
65 | return;
66 | }
67 | System.Text.StringBuilder builder = new System.Text.StringBuilder();
68 | foreach (var item in validateResults)
69 | {
70 | var message = item.Message;
71 | if (message == null || message.Length == 0)
72 | {
73 | continue;
74 | }
75 | builder.AppendFormat("=======================\r\n");
76 | builder.AppendFormat("对页面 {0} 的验证结果:\r\n\r\n", item.Uri);
77 | builder.AppendFormat("验证 [{0}] 不通过:\r\n{1}\r\n", item.Name, message);
78 | }
79 | if (builder.Length == 0)
80 | {
81 | return;
82 | }
83 | System.IO.File.AppendAllText(filePath, builder.ToString());
84 | }
85 |
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/InternalALinkValidation.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | public class InternalALinkExistingValidation : IValidation
9 | {
10 | String searchDirectory;
11 | IUriResourcesFromLocalFileSystemReslover fileReslover;
12 |
13 | Dictionary files = new Dictionary(5000);
14 |
15 | ///
16 | /// 初始化 InternalALinkValidation 的新实例。
17 | ///
18 | /// 引用资源的输出文件夹。
19 | /// 用于将Uri路径转换为本地路径。
20 | public InternalALinkExistingValidation(String searchBaseDirectory, IUriResourcesFromLocalFileSystemReslover resourceFileReslover = null)
21 | {
22 | this.searchDirectory = searchBaseDirectory;
23 | this.fileReslover = resourceFileReslover ?? DefaultUriToLocalFilePathReslover.Instance;
24 | if (searchBaseDirectory == null)
25 | {
26 | throw new ArgumentNullException("outputBaseDirectory");
27 | }
28 | }
29 |
30 | public string Validate(HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext status)
31 | {
32 | //得到当前页面的目录
33 | string documentDir = status.Uri.GetFileDirectory();
34 | var errorMessage = new StringBuilder();
35 | var htmlNode = document.DocumentNode;
36 | var nodes = htmlNode.SelectNodes("//a");
37 | if (nodes == null || nodes.Count == 0)
38 | {
39 | return null;
40 | }
41 | foreach (var aNode in nodes)
42 | {
43 | string href = aNode.GetAttributeValue("href", null);
44 | if (string.IsNullOrWhiteSpace(href) || href[0] == '#')
45 | {
46 | continue;
47 | }
48 | //如果href是相对当前页面来说的:
49 | if (!href[0].IsDirectorySeparator())
50 | {
51 | href = documentDir + href;
52 | }
53 |
54 | Uri uri;
55 | if (!Uri.TryCreate(href, UriKind.RelativeOrAbsolute, out uri))
56 | {
57 | continue;
58 | }
59 | //这里可以增加对站内域名的判断
60 | if (uri.IsAbsoluteUri && !string.IsNullOrEmpty(uri.Host))
61 | {
62 | continue;
63 | }
64 | string local = fileReslover.ResloveLocalPath(uri);
65 | if (string.IsNullOrEmpty(local))
66 | {
67 | continue;
68 | }
69 | string localPath = System.IO.Path.Combine(searchDirectory, local);
70 | bool isExisting;
71 | if (!files.TryGetValue(localPath, out isExisting))
72 | {
73 | isExisting = System.IO.File.Exists(localPath);
74 | try
75 | {
76 | files.Add(localPath, isExisting);
77 | }
78 | catch (Exception)
79 | {
80 | }
81 | }
82 | if (!isExisting)
83 | {
84 | errorMessage.AppendFormat("本地不存在链接 \"{0}\" 所指向的文件 \"{1}\"。", uri.ToString(), localPath);
85 | }
86 | }
87 | return errorMessage.Length == 0 ? null : errorMessage.ToString();
88 | }
89 |
90 | public string Name { get { return "检查页面HTML中的站内A链接指向的页面是在本地存在对应文件。"; } }
91 |
92 | public ValidationType Type { get { return ValidationType.Link; } }
93 |
94 |
95 | public override string ToString() { return ((IValidation)this).Name; }
96 |
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/XPathValidation.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 | using Rhythm.Staticize;
7 |
8 | namespace Rhythm.Staticize
9 | {
10 | ///
11 | /// 提供验证 HTML 文档中特定DOM元素的位置是否与预期的位置相符(内部使用元素的XPath进行对比,预期的位置通过定义一个参考模板获得)。
12 | ///
13 | public class XPathValidation : IValidation
14 | {
15 | Dictionary elementXPath;
16 |
17 | HtmlAgilityPack.HtmlDocument truthDocument;
18 |
19 | ///
20 | /// 初始化 HtmlDocumentXPathValidation 的新实例。
21 | ///
22 | /// 参考模板文件。
23 | /// 需要进行位置检查的网页元素Id。
24 | public XPathValidation(String templateFile, params String[] elementIds)
25 | {
26 | if (templateFile == null)
27 | {
28 | throw new ArgumentNullException("truthDocumentFile");
29 | }
30 | if (elementIds == null)
31 | {
32 | throw new ArgumentNullException("elementIds");
33 | }
34 | this.truthDocument = new HtmlAgilityPack.HtmlDocument();
35 | this.truthDocument.Load(templateFile);
36 | Init(elementIds);
37 | }
38 |
39 | ///
40 | /// 初始化 HtmlDocumentXPathValidation 的新实例。
41 | ///
42 | /// 参考模板文档对象。
43 | /// 需要进行位置检查的网页元素Id。
44 | public XPathValidation(HtmlAgilityPack.HtmlDocument templateDocument, params String[] elementIds)
45 | {
46 | this.truthDocument = templateDocument;
47 | if (templateDocument == null)
48 | {
49 | throw new ArgumentNullException("truthDocument");
50 | }
51 | Init(elementIds);
52 | }
53 |
54 | void Init(String[] elementIds)
55 | {
56 | elementXPath = new Dictionary(elementIds.Length);
57 | foreach (var id in elementIds)
58 | {
59 | AddXPathCheck(id);
60 | }
61 | }
62 |
63 | void AddXPathCheck(String id)
64 | {
65 | var element = truthDocument.GetElementbyId(id);
66 | if (element == null)
67 | {
68 | return;
69 | }
70 | if (elementXPath.ContainsKey(id))
71 | {
72 | return;
73 | }
74 | elementXPath.Add(id, element.XPath);
75 | }
76 |
77 | public string Validate(HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext status)
78 | {
79 | var errorMessageBuilder = new StringBuilder();
80 | foreach (var id in this.elementXPath.Keys)
81 | {
82 | String truthXPath = elementXPath[id];
83 | var element = document.GetElementbyId(id);
84 | if (element == null)
85 | {
86 | errorMessageBuilder.AppendFormat("\r\n元素 \"{0}\" 在文档中不存在。", id);
87 | continue;
88 | }
89 | if (element.XPath != truthXPath)
90 | {
91 | errorMessageBuilder.AppendFormat("\r\n元素 \"{0}\" XPath 不匹配,应为\"{1}\",但实际为\"{2}\"。\r\n行号:{3}\r\n源HTML:\r\n{4}\r\n", id, truthXPath, element.XPath, element.Line.ToString(), element.OuterHtml);
92 | continue;
93 | }
94 | }
95 | return errorMessageBuilder.Length == 0 ? null : errorMessageBuilder.ToString();
96 | }
97 |
98 |
99 | public string Name { get { return "页面元素XPath与模板XPath是否相符。"; } }
100 |
101 | public ValidationType Type { get { return ValidationType.Tag; } }
102 |
103 |
104 | public override string ToString() { return ((IValidation)this).Name; }
105 |
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Rhythm.Staticize.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | AnyCPU
7 | {C1EBD5EC-6861-4C79-A954-6B58EC944FF8}
8 | Library
9 | Properties
10 | Rhythm.Staticize
11 | Rhythm.Staticize
12 | v4.0
13 | 512
14 | 12.0.0
15 | 2.0
16 | SAK
17 | SAK
18 | SAK
19 | SAK
20 |
21 |
22 |
23 | true
24 | full
25 | false
26 | bin\
27 | DEBUG;TRACE
28 | prompt
29 | 4
30 |
31 |
32 | pdbonly
33 | true
34 | bin\
35 | TRACE
36 | prompt
37 | 4
38 |
39 |
40 |
41 | ..\References\HtmlAgilityPack\HtmlAgilityPack.dll
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
88 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/StaticizeStepStatus.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Rhythm.Staticize
7 | {
8 | ///
9 | /// 用于静态化状态跟踪。
10 | /// 支持异步跟踪静态化状态
11 | ///
12 | public class StaticizeStepStatus : IEnumerable
13 | {
14 | internal int pageCount;
15 |
16 | ///
17 | /// 由于在内部生成页面是并行化的,因此此字段是 volatile 字段。
18 | ///
19 | volatile int volatileGeneratedPageCount;
20 | int generatedPageCount;
21 | internal int validatedPageCount;
22 |
23 | Dictionary contexts;
24 |
25 | ///
26 | /// 初始化 StaticizeStepStatus 的新实例。
27 | ///
28 | public StaticizeStepStatus()
29 | {
30 | this.Errors = new List();
31 | ValidationErrors = new List();
32 | }
33 |
34 | internal void AddGeneratedPageCount()
35 | {
36 | volatileGeneratedPageCount++;
37 | generatedPageCount = volatileGeneratedPageCount;
38 | }
39 |
40 | internal void AddValidatedPageCount()
41 | {
42 | validatedPageCount++;
43 | }
44 |
45 | //public StaticizeStepStatus(string id)
46 | //{
47 | // if (id == null)
48 | // {
49 | // throw new ArgumentNullException("id");
50 | // }
51 | // this.Id = id;
52 | //}
53 |
54 | //public string Id { get; internal set; }
55 | internal void Init(HtmlStaticizeContext[] entries)
56 | {
57 | contexts = new Dictionary(entries.Length);
58 | for (int j = 0; j < entries.Length; j++)
59 | {
60 | contexts.Add(entries[j].uri, entries[j]);
61 | }
62 | Urls = contexts.Keys;
63 | Items = entries;
64 | }
65 |
66 | ///
67 | /// 获取验证错误信息
68 | ///
69 | public IList ValidationErrors { get; private set; }
70 |
71 | public HtmlStaticizeContext this[Uri uri] { get { return contexts[uri]; } }
72 |
73 | public ICollection Urls { get; private set; }
74 |
75 | ///
76 | /// 获取所有页面的静态化上下文
77 | ///
78 | public ICollection Items { get; private set; }
79 |
80 | ///
81 | /// 获取静态化过程中发生的异常信息
82 | ///
83 | public IList Errors { get; internal set; }
84 |
85 | public IEnumerator GetEnumerator()
86 | {
87 | return contexts.Values.GetEnumerator();
88 | }
89 |
90 | System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
91 | {
92 | return contexts.Values.GetEnumerator();
93 | }
94 |
95 | StaticizeStep step;
96 | ///
97 | /// 获取当前静态化过程正处于哪个步骤
98 | ///
99 | public StaticizeStep Step
100 | {
101 | get { return step; }
102 | // 步骤变更后,会触发 StepChanged 事件
103 | internal set
104 | {
105 | var previus = step;
106 | step = value;
107 | if (previus != value)
108 | {
109 | var @event = StepChanged;
110 | if (@event != null)
111 | {
112 | @event(this, new StaticizeStepChangedEventArgs { Step = step, });
113 | }
114 | }
115 | }
116 | }
117 |
118 | ///
119 | /// 阶段性状态变更事件。
120 | ///
121 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1009:DeclareEventHandlersCorrectly")]
122 | public event StaticizeStepChangedEventHandler StepChanged;
123 |
124 | ///
125 | /// 获取当前的页面总数
126 | ///
127 | public int PageCount { get { return pageCount; } }
128 |
129 | ///
130 | /// 获取已生成的页面总数
131 | ///
132 | public int GeneratedPageCount { get { return generatedPageCount; } }
133 |
134 | ///
135 | /// 获取已验证的页面总数
136 | ///
137 | public int ValidatedPageCount { get { return validatedPageCount; } }
138 | }
139 |
140 | }
141 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/ReferenceResourcesExistingValidation.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | ///
10 | /// 检查HTML中引用的资源文件是否在文件系统中存在。
11 | /// 它会搜索网页中引用的所有CSS、JS、图片文件,然后在本地静态化目录查找是否存在这些文件。
12 | ///
13 | public class ReferenceResourcesExistingValidation : IValidation
14 | {
15 | Dictionary exisitingFiles = new Dictionary(1000);
16 | String outputDir;
17 | IUriResourcesFromLocalFileSystemReslover fileReslover;
18 |
19 | ///
20 | /// 初始化 ReferenceResourcesExistsValidation 的新实例。
21 | ///
22 | /// 资源的基础保存目录,将会基于此目录搜索相关资源文件。
23 | /// 定义将Uri路径转换为本地文件系统路径。
24 | public ReferenceResourcesExistingValidation(String resourceBaseDir, IUriResourcesFromLocalFileSystemReslover resourceFileReslover = null)
25 | {
26 | this.outputDir = resourceBaseDir;
27 | this.fileReslover = resourceFileReslover ?? DefaultUriToLocalFilePathReslover.Instance;
28 | }
29 |
30 | public string Validate(HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext status)
31 | {
32 | var resources = status.Resources;
33 | String resultCss = Validate(resources.ReferenceCsses, status);
34 | String resultJs = Validate(resources.ReferenceJavascripts, status);
35 | String resultImage = Validate(resources.ReferenceImages, status);
36 | return (
37 | String.IsNullOrEmpty(resultCss) &&
38 | String.IsNullOrEmpty(resultImage) &&
39 | String.IsNullOrEmpty(resultJs)
40 | ) ? null : (
41 | String.Format("{0}\r\n{1}\r\n{2}", resultCss, resultJs, resultImage)
42 | );
43 | }
44 |
45 | string Validate(IList list, HtmlStaticizeContext status)
46 | {
47 | if (list == null)
48 | {
49 | return null;
50 | }
51 | Boolean isValid = true;
52 | var errorMessage = new StringBuilder();
53 | foreach (var uri in list)
54 | {
55 | String fileName = fileReslover.ResloveLocalPath(uri);
56 | if (String.IsNullOrEmpty(fileName))
57 | {
58 | continue;
59 | }
60 | String physicalFilePath = System.IO.Path.Combine(outputDir, fileName);
61 | bool fileExists = false;
62 | bool hasKey = false;
63 | try
64 | {
65 | //并发修改 patch
66 | hasKey = exisitingFiles.TryGetValue(physicalFilePath, out fileExists);
67 | }
68 | catch (Exception)
69 | {
70 | }
71 | if (!hasKey || !fileExists)
72 | {
73 | if (!hasKey)
74 | {
75 | fileExists = System.IO.File.Exists(physicalFilePath);
76 | }
77 | if (!fileExists)
78 | {
79 | isValid = false;
80 | status.Resources.NotExistsFiles.Add(uri, physicalFilePath);
81 | errorMessage.AppendFormat("资源 \"{0}\" 未能在本地预期的路径 \"{1}\" 中找到。\r\n", uri.ToString(), physicalFilePath);
82 | {
83 | var ex = status.GenerationError;
84 | if (ex != null)
85 | {
86 | errorMessage.AppendLine("这可能是由于请求文件时发生异常造成的,以下是异常信息:");
87 | errorMessage.AppendFormat("{0}:\r\n{1}\r\n\r\n", ex.Message, ex.ToString());
88 | }
89 | }
90 | }
91 | try
92 | {
93 | //并发修改 patch
94 | exisitingFiles.Add(physicalFilePath, fileExists);
95 | }
96 | catch (Exception)
97 | {
98 | }
99 | }
100 | }
101 | return isValid ? null : errorMessage.ToString();
102 | }
103 |
104 | //StringBuilder errorMessage;// = new StringBuilder();
105 | //string IValidation.ErrorMessage
106 | //{
107 | // get { return errorMessage == null ? null : errorMessage.ToString(); }
108 | //}
109 |
110 | public string Name { get { return "网页引用的资源文件是否存在"; } }
111 |
112 | public ValidationType Type { get { return ValidationType.Resource; } }
113 |
114 |
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/Rhythm.Staticize.Tests/Rhythm.Staticize.Tests.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Debug
5 | AnyCPU
6 | {BB81F20F-68E2-415B-B885-30DEA4B7CD4D}
7 | Library
8 | Properties
9 | Rhythm.Staticize
10 | Rhythm.Staticize.Tests
11 | v4.0
12 | 512
13 | {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}
14 | 10.0
15 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)
16 | $(ProgramFiles)\Common Files\microsoft shared\VSTT\$(VisualStudioVersion)\UITestExtensionPackages
17 | False
18 | UnitTest
19 | SAK
20 | SAK
21 | SAK
22 | SAK
23 |
24 |
25 | true
26 | full
27 | false
28 | bin\Debug\
29 | DEBUG;TRACE
30 | prompt
31 | 4
32 |
33 |
34 | pdbonly
35 | true
36 | bin\Release\
37 | TRACE
38 | prompt
39 | 4
40 |
41 |
42 |
43 |
44 | 3.5
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 | {c1ebd5ec-6861-4c79-a954-6b58ec944ff8}
67 | Rhythm.Staticize
68 |
69 |
70 |
71 |
72 |
73 |
74 | False
75 |
76 |
77 | False
78 |
79 |
80 | False
81 |
82 |
83 | False
84 |
85 |
86 |
87 |
88 |
89 |
90 |
97 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Validation/ValidationProjection.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | ///
10 | /// 提供对常用 HTML 验证方法的支持。
11 | ///
12 | public static class ValidationProjection
13 | {
14 | ///
15 | /// 验证 HTML 中是否包含指定内容。
16 | ///
17 | ///
18 | ///
19 | ///
20 | public static IValidation Contains(String value, String errorMessage = null)
21 | {
22 | return new ValidationDelegateTaken("页面是否存在指定内容", ValidationType.Content, (doc) =>
23 | {
24 | var htmlNode = doc.DocumentNode;
25 | return htmlNode.InnerHtml.Contains(value);
26 | }, errorMessage ?? string.Format("页面中不存在预期的内容:\"{0}\"。", value));
27 | }
28 |
29 | ///
30 | /// 验证 HTML 网页标题是否等于预期的标题。
31 | ///
32 | /// 预期的标题
33 | ///
34 | ///
35 | public static IValidation TitleEquals(String excepted, String errorMessage = null)
36 | {
37 | return new ValidationDelegateTaken("网页标题", ValidationType.Content, (doc) =>
38 | {
39 | var htmlNode = doc.DocumentNode;
40 | var titleNode = htmlNode.SelectSingleNode(@"html/head/title");
41 | if (titleNode == null)
42 | {
43 | return false;
44 | }
45 | return excepted == titleNode.InnerHtml;
46 | }, errorMessage ?? string.Format("预期的标题 \"{0}\"。", excepted));
47 | }
48 |
49 | ///
50 | /// 验证 HTML DOM 中是否存在指定的元素。
51 | ///
52 | ///
53 | ///
54 | ///
55 | public static IValidation HasElement(String elementId, String errorMessage = null)
56 | {
57 | return new ValidationDelegateTaken("页面元素是否存在", ValidationType.Tag, (doc) =>
58 | {
59 | var htmlNode = doc.DocumentNode;
60 | var element = doc.GetElementbyId(elementId);
61 | return element != null;
62 | }, errorMessage ?? string.Format("页面中不存在元素\"{0}\"。", elementId));
63 | }
64 |
65 | ///
66 | /// 验证 HTML Docuemnt 是否包含指定的 CSS 文件。
67 | ///
68 | ///
69 | /// 错误信息。
70 | ///
71 | public static IValidation HasCssLink(String cssHref, String errorMessage = null)
72 | {
73 | return new ValidationDelegateTaken("CSS 标签是否存在", ValidationType.Tag , (doc) =>
74 | {
75 | var htmlNode = doc.DocumentNode;
76 | var nodes = htmlNode.SelectNodes(@"//link[@rel='stylesheet']");
77 | if (nodes == null)
78 | {
79 | return false;
80 | }
81 | return nodes.Where(n => cssHref == n.GetAttributeValue("href", null)).Count() > 0;
82 | }, errorMessage ?? string.Format("CSS 标签 \"{0}\" 不存在。", cssHref));
83 | }
84 |
85 | ///
86 | /// 验证 HTML Docuemnt 是否包含指定的 JS 文件。
87 | ///
88 | ///
89 | /// 错误信息。
90 | ///
91 | public static IValidation HasScriptLink(String jsSrc, String errorMessage = null)
92 | {
93 | return new ValidationDelegateTaken("JS 标签是否存在", ValidationType.Tag, (doc) =>
94 | {
95 | var htmlNode = doc.DocumentNode;
96 | var nodes = htmlNode.SelectNodes(@"//script[@src]");
97 | if (nodes == null)
98 | {
99 | return false;
100 | }
101 | return nodes.Where(n => jsSrc == n.GetAttributeValue("src", null)).Count() > 0;
102 | }, errorMessage ?? string.Format("JS 标签 \"{0}\" 不存在。", jsSrc));
103 | }
104 |
105 | ///
106 | /// 验证 HTML Docuemnt 中引用的资源十是否存在。
107 | ///
108 | /// 资源的基础保存目录,将会基于此目录搜索相关资源文件。
109 | /// 错误信息。
110 | ///
111 | public static IValidation ResourcesExisting(String resourceBaseDirectory, String errorMessage = null)
112 | {
113 | return new ReferenceResourcesExistingValidation(resourceBaseDirectory);
114 | }
115 |
116 | public static IValidation XPathEquals(String templateFile, params String[] elementIds)
117 | {
118 | return new XPathValidation(templateFile, elementIds);
119 | }
120 |
121 | public static IValidation InternalALinkExisting(String searchBaseDirectory)
122 | {
123 | return new InternalALinkExistingValidation(searchBaseDirectory);
124 | }
125 |
126 |
127 | }
128 | }
129 |
--------------------------------------------------------------------------------
/Rhythm.Staticize.Tests/StaticizeTest.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using Microsoft.VisualStudio.TestTools.UnitTesting;
3 | using System.Linq;
4 | using System.Collections.Generic;
5 | using Rhythm;
6 | using Rhythm.Staticize;
7 |
8 | namespace Rhythm.Staticize
9 | {
10 | [TestClass]
11 | public class StaticizeTest
12 | {
13 | ///
14 | /// 初始化代码
15 | ///
16 | [TestInitialize()]
17 | public void Initialize() { }
18 |
19 | ///
20 | /// 资源清理代码
21 | ///
22 | [TestCleanup]
23 | public void Cleanup() { }
24 |
25 |
26 | [TestMethod]
27 | public void StaticizeTest1()
28 | {
29 | // 编号
30 | String batchId = CreateBatchId();
31 |
32 | // 输出文件夹
33 | string outputDirectory = System.IO.Path.Combine(AppDomain.CurrentDomain.BaseDirectory, batchId);
34 | System.IO.Directory.CreateDirectory(outputDirectory);
35 |
36 | List> pages = new List>(10000);
37 |
38 | // 将下面的 URL 生成为 HTML 静态化文件,文件会生成在 bin 下
39 | var urls = new[] {
40 | "http://www.zhihu.com/question/25519625",
41 | "http://www.zhihu.com/question/27232313",
42 | "http://www.zhihu.com/question/31291872",
43 | "http://www.zhihu.com/question/31293043",
44 | "http://www.zhihu.com/question/31318753",
45 | "http://cn.bing.com/",
46 | "http://36kr.com/"
47 | };
48 |
49 | // 需要说明,如果页面内的图片、CSS、JS 采用相对路径 即不含(http://host/),Staticize 能够自动下载并放在文件夹中
50 | // 但如果是绝对路径,如 http://img3.douban.com/misc/mixed_static/7011201580a8cbed.css ,则是不会下载的。
51 | {
52 | for (int i = 1; i < urls.Length; i++)
53 | {
54 | string outputFile = System.IO.Path.Combine(outputDirectory, string.Concat("zihu-", i.ToString(), ".html"));
55 | pages.Add(new KeyValuePair(new Uri(urls[i]), outputFile));
56 | }
57 | }
58 |
59 | CreateDirectory(pages, outputDirectory);
60 |
61 | Staticizer staticize = new Staticizer();
62 |
63 | staticize.AddBehavior(
64 | new ImageResourcesDownloadBehavior(outputDirectory)
65 | );
66 |
67 | //staticize.AddValidation(
68 | // //验证CSS文件是否存在
69 | // ValidationProjection.HasCssLink("/resources/css/jquery-ui-themes.css"),
70 | // ValidationProjection.HasCssLink("/resources/css/axure_rp_page.css"),
71 | // //验证网页主要页面DOM元素(id)是否存在
72 | // ValidationProjection.HasElement("main_container"),
73 | // //验证JS文件是否存在
74 | // ValidationProjection.HasScriptLink("/data/sitemap.js"),
75 | // ValidationProjection.HasScriptLink("/resources/scripts/jquery-1.7.1.min.js"),
76 | // ValidationProjection.HasScriptLink("/resources/scripts/axutils.js"),
77 | // ValidationProjection.HasScriptLink("/resources/scripts/jquery-ui-1.8.10.custom.min.js"),
78 | // ValidationProjection.HasScriptLink("/resources/scripts/axurerp_beforepagescript.js"),
79 | // ValidationProjection.HasScriptLink("/resources/scripts/messagecenter.js")
80 | // );
81 |
82 | //staticize.AddValidation(
83 | // //验证 HTML Docuemnt 中引用的资源是否存在。
84 | // ValidationProjection.ResourcesExisting(outputDirectory),
85 | // //XPath
86 | // ValidationProjection.XPathEquals("main_template.html", "main_container"),
87 | // ValidationProjection.InternalALinkExisting(outputDirectory)
88 | // );
89 |
90 | var stepTaken = new StaticizeStepStatus();
91 |
92 | var staticizeResults = staticize.Staticize(pages, stepTaken);
93 |
94 | var validationResults = staticizeResults.GetValidationResults();
95 | validationResults.Save(System.IO.Path.Combine(outputDirectory, "validationResults.txt"));
96 | }
97 |
98 | public KeyValuePair CreateUri(string address, String outputDirectory)
99 | {
100 | var uri = new Uri(address);
101 | string fileName = DefaultUriToLocalFilePathReslover.Instance.ResloveLocalPath(uri);
102 | return new KeyValuePair(uri, System.IO.Path.Combine(outputDirectory, fileName));
103 | }
104 |
105 | void CreateDirectory(IEnumerable> pages, string outputDirectory)
106 | {
107 | foreach (var item in pages)
108 | {
109 | string pageDir = item.Key.GetFileDirectory();
110 | var dir = System.IO.Path.Combine(outputDirectory, pageDir.RemoveLastDirectorySeparator());
111 | System.IO.Directory.CreateDirectory(dir);
112 | }
113 | }
114 |
115 | public static string CreateBatchId()
116 | {
117 | String batchId = String.Format("{0}-{1}", System.DateTime.Now.ToString("yyyyMMddHHmmss"), Guid.NewGuid().ToString().Replace('-', new char()).Substring(0, 6));
118 | return batchId;
119 | }
120 |
121 |
122 |
123 | }
124 | }
125 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/ResourcesDownload/ResourcesDownloadBaseBehavior.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Rhythm.Staticize
8 | {
9 | ///
10 | /// 提供对被爬行页面中引用资源的下载支持。
11 | ///
12 | public class ResourcesDownloadBaseBehavior : IBehavior
13 | {
14 | String resourcesNodeSelectPath;
15 | String outputDirectory;
16 | IUriResourcesFromLocalFileSystemReslover fileReslover;
17 |
18 | ///
19 | /// 初始化 InterceptorForResourcesDownloadBase 的新实例。
20 | ///
21 | /// 引用资源的输出文件夹。
22 | /// 引用资源的HTML标签XPath表达式。
23 | /// 用于将Uri路径转换为本地路径。
24 | public ResourcesDownloadBaseBehavior(String outputBaseDirectory, String resourcesHtmlNodeSelectPath, IUriResourcesFromLocalFileSystemReslover resourceFileReslover = null)
25 | {
26 | this.outputDirectory = outputBaseDirectory;
27 | this.resourcesNodeSelectPath = resourcesHtmlNodeSelectPath;
28 | this.fileReslover = resourceFileReslover ?? new DefaultUriToLocalFilePathReslover();
29 | if (outputBaseDirectory == null)
30 | {
31 | throw new ArgumentNullException("outputBaseDirectory");
32 | }
33 | if (resourcesHtmlNodeSelectPath == null)
34 | {
35 | throw new ArgumentNullException("resourcesHtmlNodeSelectPath");
36 | }
37 | }
38 |
39 | Dictionary files = new Dictionary(100);
40 | Dictionary directories = new Dictionary(20);
41 | public void Process(HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext context)
42 | {
43 | Uri documentUri = context.Uri;
44 | String baseUrl = documentUri.GetParent();// GetParent(documentUri);
45 |
46 | var htmlNode = document.DocumentNode;
47 | var nodes = htmlNode.SelectNodes(resourcesNodeSelectPath);
48 | if (nodes == null || nodes.Count == 0)
49 | {
50 | return;
51 | }
52 | var srcAttributes = GetSrcAttributes(nodes);
53 | if (srcAttributes == null || srcAttributes.Count() == 0)
54 | {
55 | return;
56 | }
57 | var parsedSrcUris = ParseResourcesUris(documentUri, baseUrl, srcAttributes);
58 | if (parsedSrcUris == null || parsedSrcUris.Length == 0)
59 | {
60 | return;
61 | }
62 |
63 | OnResourceParsed(parsedSrcUris, context);
64 |
65 | for (int i = 0; i < parsedSrcUris.Length; i++)
66 | {
67 | var uri = parsedSrcUris[i];
68 | String localPath = fileReslover.ResloveLocalPath(uri);
69 | if (String.IsNullOrEmpty(localPath))
70 | {
71 | continue;
72 | }
73 | var localDirectory = System.IO.Path.Combine(outputDirectory, System.IO.Path.GetDirectoryName(localPath));
74 | if (!directories.ContainsKey(localDirectory) && !System.IO.Directory.Exists(localDirectory))
75 | {
76 | System.IO.Directory.CreateDirectory(localDirectory);
77 | //并发 patch
78 | try
79 | {
80 | directories.Add(localDirectory, null);
81 | }
82 | catch (Exception)
83 | {
84 | }
85 | }
86 |
87 | String saveFile = System.IO.Path.Combine(outputDirectory, localPath);
88 | //并发 patch
89 | try
90 | {
91 | //已存在相同文件,则跳过。为避免并发写同一个文件。
92 | if (files.ContainsKey(saveFile))
93 | {
94 | continue;
95 | }
96 | files.Add(saveFile, null);
97 | }
98 | catch (Exception)
99 | {
100 | }
101 | if (System.IO.File.Exists(saveFile))
102 | {
103 | continue;
104 | }
105 | if (this.fileReslover.TryCopyFromLocal(uri, saveFile))
106 | {
107 | continue;
108 | }
109 | using (System.Net.WebClient wc = new System.Net.WebClient())
110 | {
111 | try
112 | {
113 | wc.DownloadFile(uri, saveFile);
114 | }
115 | catch (Exception ex)
116 | {
117 | //修复WebClient 文件不存在仍然本地保存了一个空文件
118 | System.IO.File.Delete(saveFile);
119 | context.Errors.Add(new ResourcesDownloadException(String.Format(@"下载资源 ""{0}"" 时发生异常。", uri.ToString()), ex)
120 | {
121 | Url = uri,
122 | });
123 | continue;
124 | }
125 | }
126 | }
127 | }
128 |
129 | /////
130 | ///// 获取页面的Uri所在的目录名称。(仅支持页面)
131 | /////
132 | /////
133 | ///// 上一级的URI Address。
134 | //static string GetParent(Uri uri)
135 | //{
136 | // String documentUriString = uri.ToString();
137 | // int lastSlash = documentUriString.LastIndexOf('/');
138 | // String documenBaseDir = documentUriString.Substring(0, lastSlash);
139 | // if (lastSlash == -1)
140 | // {
141 | // documenBaseDir = documentUriString;
142 | // }
143 | // else
144 | // {
145 | // //特殊处理
146 | // //如果最后还是“/”(有的URL不标准,路径中有两个“//”,如http://localhost:90/Admin/Blogs///17.html)
147 | // while (
148 | // documenBaseDir[documenBaseDir.Length - 1] == '/' ||
149 | // documenBaseDir[documenBaseDir.Length - 1] == '\\'
150 | // )
151 | // {
152 | // documenBaseDir = documenBaseDir.Substring(0, documenBaseDir.Length - 1);
153 | // }
154 | // }
155 | // return documenBaseDir;
156 | //}
157 |
158 | ///
159 | /// 获取资源 Html Node 的“src”属性。
160 | ///
161 | /// Html Node集合
162 | /// 排除空了值的“src”属性集合。
163 | protected virtual string[] GetSrcAttributes(HtmlAgilityPack.HtmlNodeCollection nodes)
164 | {
165 | var srcAttributes = (from node in nodes
166 | where !String.IsNullOrWhiteSpace(node.GetAttributeValue("src", null))
167 | select node.GetAttributeValue("src", null)).ToArray();
168 | return srcAttributes;
169 | }
170 |
171 | ///
172 | /// 从“src”属性的值创建Uri对象。
173 | ///
174 | /// “src”属性集合。
175 | /// Uri对象集合。
176 | Uri[] ParseResourcesUris(Uri documentUri, string documenBaseDir, string[] srcAttributes)
177 | {
178 | Uri parseUri = null;
179 | var parsedImgSrcUris = (from src in srcAttributes
180 | select Uri.TryCreate(src, UriKind.RelativeOrAbsolute, out parseUri) ? parseUri : null).Where(m => m != null).ToArray();
181 |
182 | var filter = (from uri in parsedImgSrcUris
183 | where uri.IsAbsoluteUri == false || (uri.IsAbsoluteUri && uri.Host == documentUri.Host)
184 | select uri.IsAbsoluteUri ? uri : CreateAbsoluteUri(documentUri, documenBaseDir, uri)).Distinct().ToArray();
185 | return filter;
186 | }
187 |
188 | ///
189 | /// 当资源URL被正确解析,即将被下载时回调
190 | ///
191 | /// 当资源URL(集合)
192 | ///
193 | protected virtual void OnResourceParsed(Uri[] resourceUris, HtmlStaticizeContext context)
194 | {
195 |
196 | }
197 |
198 |
199 | Uri CreateAbsoluteUri(Uri documentUri, string documenBaseDir, Uri uri)
200 | {
201 | String str = uri.ToString();
202 | if (str[0] == '/')
203 | {
204 | String format = String.Format("{0}://{1}:{2}{3}", documentUri.Scheme, documentUri.Host, documentUri.Port, uri.ToString());
205 | return new Uri(format);
206 | }
207 | else
208 | {
209 |
210 | String format = String.Format("{0}/{1}", documenBaseDir, uri.ToString());
211 | return new Uri(format);
212 | }
213 | }
214 |
215 |
216 | }
217 |
218 |
219 | }
220 |
--------------------------------------------------------------------------------
/Rhythm.Staticize/Staticizer.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 | using System.Web;
7 | using System.Net;
8 | //using HtmlAgilityPack;
9 |
10 | namespace Rhythm.Staticize
11 | {
12 | ///
13 | /// 静态化关键状态(StaticizeStep)变化的事件委托。
14 | ///
15 | /// 表示当前状态的对象
16 | /// 关键状态变更事件参数
17 | public delegate void StaticizeStepChangedEventHandler(StaticizeStepStatus sender, StaticizeStepChangedEventArgs e);
18 |
19 | ///
20 | /// 静态化类库的入口。
21 | ///
22 | public class Staticizer
23 | {
24 | ///
25 | /// 执行静态化
26 | ///
27 | /// 要静态化的页面列表。Key为页面绝对URL,Value为这个页面保存在本地的路径。URL和Value必须是唯一的。
28 | /// 静态化状态,默认请传入此实例,它提供对异步线程获取静态化状态的支持。
29 | /// 静态化状态,与传入的 stepTaken 引用一致。
30 | public StaticizeStepStatus Staticize(IEnumerable> pages, StaticizeStepStatus stepTaken)
31 | {
32 | if (pages == null)
33 | {
34 | throw new ArgumentNullException("pages");
35 | }
36 | if (stepTaken == null)
37 | {
38 | throw new ArgumentNullException("stepTaken");
39 | }
40 | #region 初始化
41 | stepTaken.Step = StaticizeStep.Initialize;
42 |
43 | int pageCount = pages.Count();
44 | stepTaken.pageCount = pageCount;
45 | // 创建 Context 对象,每个页面一个 Context
46 | HtmlStaticizeContext[] entries = new HtmlStaticizeContext[pageCount];
47 | {
48 | int i = 0;
49 | foreach (var address in pages)
50 | {
51 | entries[i] = new HtmlStaticizeContext
52 | {
53 | uri = address.Key,
54 | fileName = address.Value,
55 | };
56 | i++;
57 | }
58 | }
59 | stepTaken.Init(entries);
60 |
61 | AddValidation(GenerationSuccessfulValidation.Instance);
62 |
63 | #endregion
64 |
65 |
66 | stepTaken.Step = StaticizeStep.GenerationHtml;
67 |
68 | // 生成 HTML
69 | Generate(entries, stepTaken);
70 | stepTaken.Step = StaticizeStep.GenerationHtmlCompleted;
71 |
72 | #region 验证
73 |
74 | stepTaken.Step = StaticizeStep.Validation;
75 | if (
76 | (m_Behaviors != null && m_Behaviors.Count > 0)
77 | || (m_Validations != null && m_Validations.Count > 0)
78 | )
79 | {
80 | for (int j = 0; j < entries.Length; j++)
81 | {
82 | var entry = entries[j];
83 | // 如果 generationError 不为null,表示 HTML 生成失败。
84 | if (entry.generationError != null)
85 | {
86 | var ex = entry.generationError;
87 | var vd = new ValidationResult()
88 | {
89 | ValidationType = ValidationType.Tag,
90 | Uri = entry.uri,
91 | Name = "页面HTML是否成功生成。",
92 | Message = string.Format("生成HTML期间发生错误:{0}\r\n{1}\r\n", ex.Message, ex.ToString()),
93 | Exception = ex,
94 | };
95 | entry.validationResults = new ValidationResult[] { vd };
96 | stepTaken.ValidationErrors.Add(vd);
97 | stepTaken.validatedPageCount++;
98 | continue;
99 | }
100 | // load document dom
101 | var doc = new HtmlAgilityPack.HtmlDocument();
102 | // 尝试加载 document
103 | try
104 | {
105 | doc.Load(entry.fileName, System.Text.Encoding.UTF8);
106 | }
107 | catch (Exception ex)
108 | {
109 | // 加载 document失败
110 | entry.DocumentLoadError = ex;
111 | var vd = new ValidationResult()
112 | {
113 | ValidationType = ValidationType.Tag,
114 | Uri = entry.uri,
115 | Name = "页面HTML是否成功生成。",
116 | Message = string.Format("加载HTML文档树期间发生错误:{0}\r\n{1}\r\n", ex.Message, ex.ToString()),
117 | Exception = ex,
118 | };
119 | entry.validationResults = new ValidationResult[] { vd };
120 | stepTaken.ValidationErrors.Add(vd);
121 | stepTaken.AddValidatedPageCount();
122 | continue;
123 | }
124 |
125 | if (m_Behaviors != null && m_Behaviors.Count > 0)
126 | {
127 | for (int k = 0; k < m_Behaviors.Count; k++)
128 | {
129 | m_Behaviors[k].Process(doc, entry);
130 | }
131 | }
132 | if (m_Validations != null && m_Validations.Count > 0)
133 | {
134 | Validate(doc, entry, stepTaken);
135 | }
136 | stepTaken.AddValidatedPageCount();
137 | }
138 | }
139 | stepTaken.Step = StaticizeStep.ValidationCompleted;
140 |
141 | #endregion
142 |
143 | // add context errors results to status
144 | {
145 | var all = stepTaken.Errors;
146 | for (int i = 0; i < entries.Length; i++)
147 | {
148 | var items = entries[i].Errors;
149 | if (items != null && items.Count > 0)
150 | {
151 | all.AddRange(items);
152 | }
153 | }
154 | }
155 |
156 | stepTaken.Step = StaticizeStep.Completed;
157 | return stepTaken;
158 | }
159 |
160 | ///
161 | /// 验证
162 | ///
163 | ///
164 | ///
165 | ///
166 | void Validate(HtmlAgilityPack.HtmlDocument doc, HtmlStaticizeContext context, StaticizeStepStatus stepTaken)
167 | {
168 | if (this.m_Validations != null)
169 | {
170 | var result = m_Validations.Validate(doc, context);
171 | if (result != null && result.Count > 0)
172 | {
173 | if (context.validationResults == null)
174 | {
175 | context.validationResults = result;
176 | }
177 | else
178 | {
179 | context.validationResults.AddRange(result);
180 | }
181 | stepTaken.ValidationErrors.AddRange(result);
182 | }
183 | }
184 | }
185 |
186 | ///
187 | /// 生成
188 | ///
189 | ///
190 | ///
191 | void Generate(HtmlStaticizeContext[] entries, StaticizeStepStatus step)
192 | {
193 | System.Threading.Tasks.Parallel.ForEach(entries, (entry) =>
194 | {
195 | using (var wc = new WebClient())
196 | {
197 | try
198 | {
199 | wc.DownloadFile(entry.uri, entry.fileName);
200 | step.AddGeneratedPageCount();
201 | }
202 | catch (Exception ex)
203 | {
204 | entry.generationError = ex;
205 | entry.Errors.Add(ex);
206 | //step.Errors.Add(ex);
207 | }
208 | }
209 | });
210 | //// batch download html file
211 | //using (var wc = new WebClient())
212 | //{
213 | // for (int j = 0; j < entries.Length; j++)
214 | // {
215 | // var entry = entries[j];
216 | // // may be some url down failure
217 | // // should log error
218 | // try
219 | // {
220 | // wc.DownloadFile(entry.uri, entry.fileName);
221 | // }
222 | // catch (Exception ex)
223 | // {
224 | // entry.generationError = ex;
225 | // entry.Errors.Add(ex);
226 | // }
227 | // }
228 | //}
229 | }
230 |
231 |
232 | List m_Behaviors;
233 | ///
234 | /// 添加 IBehavior ,它会在 HTML 被生成,并加载后执行。
235 | /// 因此你可以使用 IBehavior 对 HTML 进行读取,它会在验证之前执行。
236 | ///
237 | ///
238 | ///
239 | public Staticizer AddBehavior(params IBehavior[] behaviors)
240 | {
241 | if (behaviors == null)
242 | {
243 | throw new ArgumentNullException("behaviors");
244 | }
245 | if (this.m_Behaviors == null)
246 | {
247 | this.m_Behaviors = new List(behaviors);
248 | }
249 | else
250 | {
251 | this.m_Behaviors.AddRange(behaviors);
252 | }
253 | return this;
254 | }
255 |
256 | List m_Validations;
257 |
258 |
259 | ///
260 | /// 添加自定义验证规则。
261 | ///
262 | /// 自定义验证规则。
263 | public Staticizer AddValidation(params IValidation[] validations)
264 | {
265 | if (validations == null)
266 | {
267 | throw new ArgumentNullException("validations");
268 | }
269 | if (this.m_Validations == null)
270 | {
271 | this.m_Validations = new List(validations);
272 | }
273 | else
274 | {
275 | this.m_Validations.AddRange(validations);
276 | }
277 | return this;
278 | }
279 |
280 | }
281 | }
--------------------------------------------------------------------------------