├── .gitignore
├── HtmlParserSharp.sln
├── HtmlParserSharp
    ├── Common
    │   ├── Attributes.cs
    │   ├── DoctypeExpectation.cs
    │   ├── DocumentMode.cs
    │   ├── DocumentModeEventArgs.cs
    │   ├── EncodingDetectedEventArgs.cs
    │   ├── ITokenHandler.cs
    │   ├── ParserErrorEventArgs.cs
    │   └── XmlViolationPolicy.cs
    ├── Core
    │   ├── AttributeName.cs
    │   ├── CharsetState.cs
    │   ├── CoalescingTreeBuilder.cs
    │   ├── DispatchGroup.cs
    │   ├── DomTreeBuilder.cs
    │   ├── ElementName.cs
    │   ├── HtmlAttributes.cs
    │   ├── ILocator.cs
    │   ├── ITreeBuilderState.cs
    │   ├── InsertionMode.cs
    │   ├── Locator.cs
    │   ├── NCName.cs
    │   ├── NamedCharacterAccel.cs
    │   ├── NamedCharacters.cs
    │   ├── Portability.cs
    │   ├── StackNode.cs
    │   ├── StateSnapshot.cs
    │   ├── TaintableLocator.cs
    │   ├── Tokenizer.cs
    │   ├── TreeBuilder.cs
    │   ├── TreeBuilderConstants.cs
    │   └── UTF16Buffer.cs
    ├── HtmlParserSharp.csproj
    ├── Program.cs
    ├── Properties
    │   └── AssemblyInfo.cs
    ├── SampleData
    │   └── test.html
    └── SimpleHtmlParser.cs
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.suo
2 | obj/
3 | bin/


--------------------------------------------------------------------------------
/HtmlParserSharp.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 11.00
 3 | # Visual Studio 2010
 4 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HtmlParserSharp", "HtmlParserSharp\HtmlParserSharp.csproj", "{FD150915-D34F-436A-92C1-80AA505DA754}"
 5 | EndProject
 6 | Global
 7 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 8 | 		Debug|Any CPU = Debug|Any CPU
 9 | 		Release|Any CPU = Release|Any CPU
10 | 	EndGlobalSection
11 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
12 | 		{FD150915-D34F-436A-92C1-80AA505DA754}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
13 | 		{FD150915-D34F-436A-92C1-80AA505DA754}.Debug|Any CPU.Build.0 = Debug|Any CPU
14 | 		{FD150915-D34F-436A-92C1-80AA505DA754}.Release|Any CPU.ActiveCfg = Release|Any CPU
15 | 		{FD150915-D34F-436A-92C1-80AA505DA754}.Release|Any CPU.Build.0 = Release|Any CPU
16 | 	EndGlobalSection
17 | 	GlobalSection(SolutionProperties) = preSolution
18 | 		HideSolutionNode = FALSE
19 | 	EndGlobalSection
20 | EndGlobal
21 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Common/Attributes.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2012 Patrick Reisert
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 5 |  * copy of this software and associated documentation files (the "Software"), 
 6 |  * to deal in the Software without restriction, including without limitation 
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 8 |  * and/or sell copies of the Software, and to permit persons to whom the 
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in 
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | using System;
24 | 
25 | namespace HtmlParserSharp.Common
26 | {
27 | 	// This file contains the attributes that correspond to the annotations
28 | 	// @NsUri, @Prefix and @Local in the Java code. Probably we can safely remove these.
29 | 
30 | 	[AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Method)]
31 | 	public class NsUriAttribute : Attribute	{ }
32 | 
33 | 	[AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Method)]
34 | 	public class PrefixAttribute : Attribute { }
35 | 
36 | 	[AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Method)]
37 | 	public class LocalAttribute : Attribute { }
38 | }
39 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Common/DoctypeExpectation.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2007 Henri Sivonen
 3 |  * Copyright (c) 2012 Patrick Reisert
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 6 |  * copy of this software and associated documentation files (the "Software"), 
 7 |  * to deal in the Software without restriction, including without limitation 
 8 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 9 |  * and/or sell copies of the Software, and to permit persons to whom the 
10 |  * Software is furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in 
13 |  * all copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
20 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
21 |  * DEALINGS IN THE SOFTWARE.
22 |  */
23 | 
24 | using System;
25 | using System.Collections.Generic;
26 | using System.Linq;
27 | using System.Text;
28 | 
29 | namespace HtmlParserSharp.Common
30 | {
31 | 	/// <summary>
32 | 	/// Used for indicating desired behavior with legacy doctypes.
33 | 	/// </summary>
34 | 	public enum DoctypeExpectation
35 | 	{
36 | 		/// <summary>
37 | 		/// Be a pure HTML5 parser.
38 | 		/// </summary>
39 | 		Html,
40 | 
41 | 		/// <summary>
42 | 		/// Require the HTML 4.01 Transitional public id. Turn on HTML4-specific
43 | 		/// additional errors regardless of doctype.
44 | 		/// </summary>
45 | 		Html401Transitional,
46 | 
47 | 		/// <summary>
48 | 		/// Require the HTML 4.01 Transitional public id and a system id. Turn on
49 | 		/// HTML4-specific additional errors regardless of doctype.
50 | 		/// </summary>
51 | 		Html401Strict,
52 | 
53 | 		/// <summary>
54 | 		/// Treat the doctype required by HTML 5, doctypes with the HTML 4.01 Strict
55 | 		/// public id and doctypes with the HTML 4.01 Transitional public id and a
56 | 		/// system id as non-errors. Turn on HTML4-specific additional errors if the
57 | 		/// public id is the HTML 4.01 Strict or Transitional public id.
58 | 		/// </summary>
59 | 		Auto,
60 | 
61 | 		/// <summary>
62 | 		/// Never enable HTML4-specific error checks. Never report any doctype
63 | 		/// condition as an error. (Doctype tokens in wrong places will be
64 | 		/// reported as errors, though.) The application may decide what to log
65 | 		/// in response to calls to  <code>DocumentModeHanler</code>. This mode
66 | 		/// is meant for doing surveys on existing content.
67 | 		/// </summary>
68 | 		NoDoctypeErrors
69 | 	}
70 | }
71 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Common/DocumentMode.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2007 Henri Sivonen
 3 |  * Copyright (c) 2008 Mozilla Foundation
 4 |  * Copyright (c) 2012 Patrick Reisert
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 7 |  * copy of this software and associated documentation files (the "Software"), 
 8 |  * to deal in the Software without restriction, including without limitation 
 9 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
10 |  * and/or sell copies of the Software, and to permit persons to whom the 
11 |  * Software is furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in 
14 |  * all copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
21 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
22 |  * DEALINGS IN THE SOFTWARE.
23 |  */
24 | 
25 | namespace HtmlParserSharp.Common
26 | {
27 | 	public enum DocumentMode
28 | 	{
29 | 		/// <summary>
30 | 		/// The Standards Mode
31 | 		/// </summary>
32 | 		StandardsMode,
33 | 
34 | 		/// <summary>
35 | 		/// The Limited Quirks Mode aka. The Almost Standards Mode
36 | 		/// </summary>
37 | 		AlmostStandardsMode,
38 | 
39 | 		/// <summary>
40 | 		/// The Quirks Mode
41 | 		/// </summary>
42 | 		///
43 | 		QuirksMode
44 | 	}
45 | }
46 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Common/DocumentModeEventArgs.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2012 Patrick Reisert
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 5 |  * copy of this software and associated documentation files (the "Software"), 
 6 |  * to deal in the Software without restriction, including without limitation 
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 8 |  * and/or sell copies of the Software, and to permit persons to whom the 
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in 
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | using System;
24 | 
25 | namespace HtmlParserSharp.Common
26 | {
27 | 	public class DocumentModeEventArgs : EventArgs
28 | 	{
29 | 		public DocumentMode Mode { get; private set; }
30 | 		public string PublicIdentifier { get; private set; }
31 | 		public string SystemIdentifier { get; private set; }
32 | 		public bool Html4SpecificAdditionalErrorChecks { get; private set; }
33 | 
34 | 		/// <summary>
35 | 		/// Receive notification of the document mode.
36 | 		/// </summary>
37 | 		/// <param name="mode">The document mode.</param>
38 | 		/// <param name="publicIdentifier">The public identifier of the doctype or <c>null</c> if unavailable.</param>
39 | 		/// <param name="systemIdentifier">The system identifier of the doctype or <c>null</c> if unavailable.</param>
40 | 		/// <param name="html4SpecificAdditionalErrorChecks"><c>true</c>  if HTML 4-specific checks were enabled,
41 | 		/// <c>false</c> otherwise</param>
42 | 		public DocumentModeEventArgs(DocumentMode mode, string publicIdentifier, string systemIdentifier, bool html4SpecificAdditionalErrorChecks)
43 | 		{
44 | 			Mode = mode;
45 | 			PublicIdentifier = publicIdentifier;
46 | 			SystemIdentifier = systemIdentifier;
47 | 			Html4SpecificAdditionalErrorChecks = html4SpecificAdditionalErrorChecks;
48 | 		}
49 | 	}
50 | }
51 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Common/EncodingDetectedEventArgs.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2012 Patrick Reisert
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 5 |  * copy of this software and associated documentation files (the "Software"), 
 6 |  * to deal in the Software without restriction, including without limitation 
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 8 |  * and/or sell copies of the Software, and to permit persons to whom the 
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in 
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | using System;
24 | 
25 | namespace HtmlParserSharp.Common
26 | {
27 | 	public class EncodingDetectedEventArgs : EventArgs
28 | 	{
29 | 		public string Encoding { get; private set; }
30 | 		public bool AcceptEncoding { get; set; }
31 | 
32 | 		public EncodingDetectedEventArgs(string encoding)
33 | 		{
34 | 			Encoding = encoding;
35 | 			AcceptEncoding = false;
36 | 		}
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Common/ITokenHandler.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright (c) 2007 Henri Sivonen
  3 |  * Copyright (c) 2008-2010 Mozilla Foundation
  4 |  * Copyright (c) 2012 Patrick Reisert
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a 
  7 |  * copy of this software and associated documentation files (the "Software"), 
  8 |  * to deal in the Software without restriction, including without limitation 
  9 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 10 |  * and/or sell copies of the Software, and to permit persons to whom the 
 11 |  * Software is furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in 
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 21 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 22 |  * DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | using HtmlParserSharp.Core;
 26 | 
 27 | namespace HtmlParserSharp.Common
 28 | {
 29 | 	/// <summary>
 30 | 	/// <code>Tokenizer</code> reports tokens through this interface.
 31 | 	/// </summary>
 32 | 	public interface ITokenHandler
 33 | 	{
 34 | 
 35 | 		/// <summary>
 36 | 		/// This method is called at the start of tokenization before any other
 37 | 		/// methods on this interface are called. Implementations should hold the
 38 | 		/// reference to the <code>Tokenizer</code> in order to set the content
 39 | 		/// model flag and in order to be able to query for <code>Locator</code> data.
 40 | 		/// </summary>
 41 | 		/// <param name="self">The Tokenizer.</param>
 42 | 		void StartTokenization(Tokenizer self);
 43 | 
 44 | 		/// <summary>
 45 | 		/// If this handler implementation cares about comments, return <code>true</code>.
 46 | 		/// If not, return <code>false</code>
 47 | 		/// </summary>
 48 | 		/// <returns>Whether this handler wants comments</returns>
 49 | 		bool WantsComments { get; }
 50 | 
 51 | 		/// <summary>
 52 | 		/// Receive a doctype token.
 53 | 		/// </summary>
 54 | 		/// <param name="name">The name.</param>
 55 | 		/// <param name="publicIdentifier">The public identifier.</param>
 56 | 		/// <param name="systemIdentifier">The system identifier.</param>
 57 | 		/// <param name="forceQuirks">Whether the token is correct.</param>
 58 | 		void Doctype(string name, string publicIdentifier, string systemIdentifier, bool forceQuirks);
 59 | 
 60 | 		/// <summary>
 61 | 		/// Receive a start tag token.
 62 | 		/// </summary>
 63 | 		/// <param name="eltName">The tag name.</param>
 64 | 		/// <param name="attributes">The attributes.</param>
 65 | 		/// <param name="selfClosing">TODO</param>
 66 | 		void StartTag(ElementName eltName, HtmlAttributes attributes, bool selfClosing);
 67 | 
 68 | 		/// <summary>
 69 | 		/// Receive an end tag token.
 70 | 		/// </summary>
 71 | 		/// <param name="eltName">The tag name.</param>
 72 | 		void EndTag(ElementName eltName);
 73 | 
 74 | 		/// <summary>
 75 | 		/// Receive a comment token. The data is junk if the<code>wantsComments()</code>
 76 | 		/// returned <code>false</code>.
 77 | 		/// </summary>
 78 | 		/// <param name="buf">The buffer holding the data.</param>
 79 | 		/// <param name="start">The offset into the buffer.</param>
 80 | 		/// <param name="length">The number of code units to read.</param>
 81 | 		void Comment(char[] buf, int start, int length);
 82 | 
 83 | 		/// <summary>
 84 | 		/// Receive character tokens. This method has the same semantics as the SAX
 85 | 		/// method of the same name.
 86 | 		/// </summary>
 87 | 		/// <param name="buf">A buffer holding the data.</param>
 88 | 		/// <param name="start">The offset into the buffer.</param>
 89 | 		/// <param name="length">The number of code units to read.</param>
 90 | 		void Characters(char[] buf, int start, int length);
 91 | 
 92 | 		/// <summary>
 93 | 		/// Reports a U+0000 that's being turned into a U+FFFD.
 94 | 		/// </summary>
 95 | 		void ZeroOriginatingReplacementCharacter();
 96 | 
 97 | 		/// <summary>
 98 | 		/// The end-of-file token.
 99 | 		/// </summary>
100 | 		void Eof();
101 | 
102 | 		/// <summary>
103 | 		/// The perform final cleanup.
104 | 		/// </summary>
105 | 		void EndTokenization();
106 | 
107 | 		/// <summary>
108 | 		/// Checks if the CDATA sections are allowed.
109 | 		/// </summary>
110 | 		/// <returns><c>true</c> if CDATA sections are allowed</returns>
111 | 		bool IsCDataSectionAllowed { get; }
112 | 	}
113 | }
114 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Common/ParserErrorEventArgs.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2012 Patrick Reisert
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 5 |  * copy of this software and associated documentation files (the "Software"), 
 6 |  * to deal in the Software without restriction, including without limitation 
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 8 |  * and/or sell copies of the Software, and to permit persons to whom the 
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in 
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | using System;
24 | 
25 | namespace HtmlParserSharp.Common
26 | {
27 | 	public class ParserErrorEventArgs : EventArgs
28 | 	{
29 | 		public string Message { get; private set; }
30 | 		public bool IsWarning { get; private set; }
31 | 
32 | 		public ParserErrorEventArgs(string message, bool isWarning)
33 | 		{
34 | 			Message = message;
35 | 			IsWarning = isWarning;
36 | 		}
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Common/XmlViolationPolicy.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2007 Henri Sivonen
 3 |  * Copyright (c) 2012 Patrick Reisert
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 6 |  * copy of this software and associated documentation files (the "Software"), 
 7 |  * to deal in the Software without restriction, including without limitation 
 8 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 9 |  * and/or sell copies of the Software, and to permit persons to whom the 
10 |  * Software is furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in 
13 |  * all copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
20 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
21 |  * DEALINGS IN THE SOFTWARE.
22 |  */
23 | 
24 | using System;
25 | using System.Collections.Generic;
26 | using System.Linq;
27 | using System.Text;
28 | 
29 | namespace HtmlParserSharp.Common
30 | {
31 | 	/// <summary>
32 | 	/// Policy for XML 1.0 violations.
33 | 	/// </summary>
34 | 	/// 
35 | 	public enum XmlViolationPolicy
36 | 	{
37 | 		/// <summary>
38 | 		/// Conform to HTML 5, allow XML 1.0 to be violated.
39 | 		/// </summary>
40 | 		Allow,
41 | 
42 | 		/// <summary>
43 | 		/// Halt when something cannot be mapped to XML 1.0.
44 | 		/// </summary>
45 | 		Fatal,
46 | 
47 | 		/// <summary>
48 | 		/// Be non-conforming and alter the infoset to fit
49 | 		/// XML 1.0 when something would otherwise not be
50 | 		/// mappable to XML 1.0.
51 | 		/// </summary>
52 | 		AlterInfoset
53 | 	}
54 | }
55 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/CharsetState.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2007 Henri Sivonen
 3 |  * Copyright (c) 2007-2011 Mozilla Foundation
 4 |  * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla 
 5 |  * Foundation, and Opera Software ASA.
 6 |  * Copyright (c) 2012 Patrick Reisert
 7 |  *
 8 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 9 |  * copy of this software and associated documentation files (the "Software"), 
10 |  * to deal in the Software without restriction, including without limitation 
11 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
12 |  * and/or sell copies of the Software, and to permit persons to whom the 
13 |  * Software is furnished to do so, subject to the following conditions:
14 |  *
15 |  * The above copyright notice and this permission notice shall be included in 
16 |  * all copies or substantial portions of the Software.
17 |  *
18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
19 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
20 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
21 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
22 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
23 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
24 |  * DEALINGS IN THE SOFTWARE.
25 |  */
26 | 
27 | namespace HtmlParserSharp.Core
28 | {
29 | 	public enum CharsetState
30 | 	{
31 | 		CHARSET_INITIAL = 0,
32 | 
33 | 		CHARSET_C = 1,
34 | 
35 | 		CHARSET_H = 2,
36 | 
37 | 		CHARSET_A = 3,
38 | 
39 | 		CHARSET_R = 4,
40 | 
41 | 		CHARSET_S = 5,
42 | 
43 | 		CHARSET_E = 6,
44 | 
45 | 		CHARSET_T = 7,
46 | 
47 | 		CHARSET_EQUALS = 8,
48 | 
49 | 		CHARSET_SINGLE_QUOTED = 9,
50 | 
51 | 		CHARSET_DOUBLE_QUOTED = 10,
52 | 
53 | 		CHARSET_UNQUOTED = 11
54 | 	}
55 | }
56 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/CoalescingTreeBuilder.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2008-2010 Mozilla Foundation
 3 |  * Copyright (c) 2012 Patrick Reisert
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 6 |  * copy of this software and associated documentation files (the "Software"), 
 7 |  * to deal in the Software without restriction, including without limitation 
 8 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 9 |  * and/or sell copies of the Software, and to permit persons to whom the 
10 |  * Software is furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in 
13 |  * all copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
20 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
21 |  * DEALINGS IN THE SOFTWARE.
22 |  */
23 | 
24 | using System;
25 | 
26 | namespace HtmlParserSharp.Core
27 | {
28 | 	/// <summary>
29 | 	/// A common superclass for tree builders that coalesce their text nodes.
30 | 	/// </summary>
31 | 	public abstract class CoalescingTreeBuilder<T> : TreeBuilder<T> where T : class
32 | 	{
33 | 		protected override void AccumulateCharacters(char[] buf, int start, int length)
34 | 		{
35 | 			int newLen = charBufferLen + length;
36 | 			if (newLen > charBuffer.Length)
37 | 			{
38 | 				char[] newBuf = new char[newLen];
39 | 				Array.Copy(charBuffer, newBuf, charBufferLen);
40 | 				charBuffer = null; // release the old buffer in C++
41 | 				charBuffer = newBuf;
42 | 			}
43 | 			Array.Copy(buf, start, charBuffer, charBufferLen, length);
44 | 			charBufferLen = newLen;
45 | 		}
46 | 
47 | 		override protected void AppendCharacters(T parent, char[] buf, int start, int length)
48 | 		{
49 | 			AppendCharacters(parent, new String(buf, start, length));
50 | 		}
51 | 
52 | 
53 | 		override protected void AppendIsindexPrompt(T parent)
54 | 		{
55 | 			AppendCharacters(parent, "This is a searchable index. Enter search keywords: ");
56 | 		}
57 | 
58 | 		protected abstract void AppendCharacters(T parent, string text);
59 | 
60 | 		override protected void AppendComment(T parent, char[] buf, int start, int length)
61 | 		{
62 | 			AppendComment(parent, new String(buf, start, length));
63 | 		}
64 | 
65 | 		protected abstract void AppendComment(T parent, string comment);
66 | 
67 | 		override protected void AppendCommentToDocument(char[] buf, int start, int length)
68 | 		{
69 | 			// TODO Auto-generated method stub
70 | 			AppendCommentToDocument(new String(buf, start, length));
71 | 		}
72 | 
73 | 		protected abstract void AppendCommentToDocument(string comment);
74 | 
75 | 		override protected void InsertFosterParentedCharacters(char[] buf, int start,
76 | 				int length, T table, T stackParent)
77 | 		{
78 | 			InsertFosterParentedCharacters(new String(buf, start, length), table, stackParent);
79 | 		}
80 | 
81 | 		protected abstract void InsertFosterParentedCharacters(string text, T table, T stackParent);
82 | 	}
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/DispatchGroup.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright (c) 2007 Henri Sivonen
  3 |  * Copyright (c) 2007-2011 Mozilla Foundation
  4 |  * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla 
  5 |  * Foundation, and Opera Software ASA.
  6 |  * Copyright (c) 2012 Patrick Reisert
  7 |  *
  8 |  * Permission is hereby granted, free of charge, to any person obtaining a 
  9 |  * copy of this software and associated documentation files (the "Software"), 
 10 |  * to deal in the Software without restriction, including without limitation 
 11 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 12 |  * and/or sell copies of the Software, and to permit persons to whom the 
 13 |  * Software is furnished to do so, subject to the following conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be included in 
 16 |  * all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 19 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 20 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 21 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 22 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 23 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 24 |  * DEALINGS IN THE SOFTWARE.
 25 |  */
 26 | 
 27 | namespace HtmlParserSharp.Core
 28 | {
 29 | 	public enum DispatchGroup
 30 | 	{
 31 | 		OTHER = 0,
 32 | 
 33 | 		A = 1,
 34 | 
 35 | 		BASE = 2,
 36 | 
 37 | 		BODY = 3,
 38 | 
 39 | 		BR = 4,
 40 | 
 41 | 		BUTTON = 5,
 42 | 
 43 | 		CAPTION = 6,
 44 | 
 45 | 		COL = 7,
 46 | 
 47 | 		COLGROUP = 8,
 48 | 
 49 | 		FORM = 9,
 50 | 
 51 | 		FRAME = 10,
 52 | 
 53 | 		FRAMESET = 11,
 54 | 
 55 | 		IMAGE = 12,
 56 | 
 57 | 		INPUT = 13,
 58 | 
 59 | 		ISINDEX = 14,
 60 | 
 61 | 		LI = 15,
 62 | 
 63 | 		LINK_OR_BASEFONT_OR_BGSOUND = 16,
 64 | 
 65 | 		MATH = 17,
 66 | 
 67 | 		META = 18,
 68 | 
 69 | 		SVG = 19,
 70 | 
 71 | 		HEAD = 20,
 72 | 
 73 | 		HR = 22,
 74 | 
 75 | 		HTML = 23,
 76 | 
 77 | 		NOBR = 24,
 78 | 
 79 | 		NOFRAMES = 25,
 80 | 
 81 | 		NOSCRIPT = 26,
 82 | 
 83 | 		OPTGROUP = 27,
 84 | 
 85 | 		OPTION = 28,
 86 | 
 87 | 		P = 29,
 88 | 
 89 | 		PLAINTEXT = 30,
 90 | 
 91 | 		SCRIPT = 31,
 92 | 
 93 | 		SELECT = 32,
 94 | 
 95 | 		STYLE = 33,
 96 | 
 97 | 		TABLE = 34,
 98 | 
 99 | 		TEXTAREA = 35,
100 | 
101 | 		TITLE = 36,
102 | 
103 | 		TR = 37,
104 | 
105 | 		XMP = 38,
106 | 
107 | 		TBODY_OR_THEAD_OR_TFOOT = 39,
108 | 
109 | 		TD_OR_TH = 40,
110 | 
111 | 		DD_OR_DT = 41,
112 | 
113 | 		H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 = 42,
114 | 
115 | 		MARQUEE_OR_APPLET = 43,
116 | 
117 | 		PRE_OR_LISTING = 44,
118 | 
119 | 		B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U = 45,
120 | 
121 | 		UL_OR_OL_OR_DL = 46,
122 | 
123 | 		IFRAME = 47,
124 | 
125 | 		EMBED_OR_IMG = 48,
126 | 
127 | 		AREA_OR_WBR = 49,
128 | 
129 | 		DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU = 50,
130 | 
131 | 		ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY = 51,
132 | 
133 | 		RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR = 52,
134 | 
135 | 		RT_OR_RP = 53,
136 | 
137 | 		COMMAND = 54,
138 | 
139 | 		PARAM_OR_SOURCE_OR_TRACK = 55,
140 | 
141 | 		MGLYPH_OR_MALIGNMARK = 56,
142 | 
143 | 		MI_MO_MN_MS_MTEXT = 57,
144 | 
145 | 		ANNOTATION_XML = 58,
146 | 
147 | 		FOREIGNOBJECT_OR_DESC = 59,
148 | 
149 | 		NOEMBED = 60,
150 | 
151 | 		FIELDSET = 61,
152 | 
153 | 		OUTPUT_OR_LABEL = 62,
154 | 
155 | 		OBJECT = 63,
156 | 
157 | 		FONT = 64,
158 | 
159 | 		KEYGEN = 65
160 | 	}
161 | }
162 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/DomTreeBuilder.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright (c) 2007 Henri Sivonen
  3 |  * Copyright (c) 2008-2010 Mozilla Foundation
  4 |  * Copyright (c) 2012 Patrick Reisert
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a 
  7 |  * copy of this software and associated documentation files (the "Software"), 
  8 |  * to deal in the Software without restriction, including without limitation 
  9 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 10 |  * and/or sell copies of the Software, and to permit persons to whom the 
 11 |  * Software is furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in 
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 21 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 22 |  * DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | using System;
 26 | using System.Collections.Generic;
 27 | using System.Linq;
 28 | using System.Text;
 29 | using System.Xml;
 30 | using HtmlParserSharp.Common;
 31 | using HtmlParserSharp.Core;
 32 | 
 33 | namespace HtmlParserSharp
 34 | {
 35 | 	/// <summary>
 36 | 	/// The tree builder glue for building a tree through the public DOM APIs.
 37 | 	/// </summary>
 38 | 	class DomTreeBuilder : CoalescingTreeBuilder<XmlElement>
 39 | 	{
 40 | 		/// <summary>
 41 | 		/// The current doc.
 42 | 		/// </summary>
 43 | 		private XmlDocument document;
 44 | 
 45 | 		override protected void AddAttributesToElement(XmlElement element, HtmlAttributes attributes) {
 46 | 			for (int i = 0; i < attributes.Length; i++) {
 47 | 				String localName = attributes.GetLocalName(i);
 48 | 				String uri = attributes.GetURI(i);
 49 | 				if (!element.HasAttribute(localName, uri)) {
 50 | 					element.SetAttribute(localName, uri,
 51 | 							attributes.GetValue(i));
 52 | 				}
 53 | 			}
 54 | 		}
 55 | 
 56 | 		override protected void AppendCharacters(XmlElement parent, string text)
 57 | 		{
 58 | 			XmlNode lastChild = parent.LastChild;
 59 | 			if (lastChild != null && lastChild.NodeType == XmlNodeType.Text) {
 60 | 				XmlText lastAsText = (XmlText) lastChild;
 61 | 				lastAsText.Data += text;
 62 | 				return;
 63 | 			}
 64 | 			parent.AppendChild(document.CreateTextNode(text));
 65 | 		}
 66 | 
 67 | 		override protected void AppendChildrenToNewParent(XmlElement oldParent, XmlElement newParent) {
 68 | 			while (oldParent.HasChildNodes) {
 69 | 				newParent.AppendChild(oldParent.FirstChild);
 70 | 			}
 71 | 		}
 72 | 
 73 | 		protected override void AppendDoctypeToDocument(string name, string publicIdentifier, string systemIdentifier)
 74 | 		{
 75 | 			// TODO: this method was not there originally. is it correct?
 76 | 			document.XmlResolver = null;
 77 | 
 78 | 			if (publicIdentifier == String.Empty)
 79 | 				publicIdentifier = null;
 80 | 			if (systemIdentifier == String.Empty)
 81 | 				systemIdentifier = null;
 82 | 
 83 | 			var doctype = document.CreateDocumentType(name, publicIdentifier, systemIdentifier, null);
 84 | 			document.XmlResolver = new XmlUrlResolver();
 85 | 			document.AppendChild(doctype);
 86 | 		}
 87 | 
 88 | 		override protected void AppendComment(XmlElement parent, String comment)
 89 | 		{
 90 | 			parent.AppendChild(document.CreateComment(comment));
 91 | 		}
 92 | 
 93 | 		override protected void AppendCommentToDocument(String comment)
 94 | 		{
 95 | 			document.AppendChild(document.CreateComment(comment));
 96 | 		}
 97 | 
 98 | 		override protected XmlElement CreateElement(string ns, string name, HtmlAttributes attributes)
 99 | 		{
100 | 			XmlElement rv = document.CreateElement(name, ns);
101 | 			for (int i = 0; i < attributes.Length; i++)
102 | 			{
103 | 				rv.SetAttribute(attributes.GetLocalName(i), attributes.GetURI(i), attributes.GetValue(i));
104 | 				if (attributes.GetType(i) == "ID")
105 | 				{
106 | 					//rv.setIdAttributeNS(null, attributes.GetLocalName(i), true); // FIXME
107 | 				}
108 | 			}
109 | 			return rv;
110 | 		}
111 | 
112 | 		override protected XmlElement CreateHtmlElementSetAsRoot(HtmlAttributes attributes)
113 | 		{
114 | 			XmlElement rv = document.CreateElement("html", "http://www.w3.org/1999/xhtml");
115 | 			for (int i = 0; i < attributes.Length; i++) {
116 | 				rv.SetAttribute(attributes.GetLocalName(i), attributes.GetURI(i), attributes.GetValue(i));
117 | 			}
118 | 			document.AppendChild(rv);
119 | 			return rv;
120 | 		}
121 | 
122 | 		override protected void AppendElement(XmlElement child, XmlElement newParent)
123 | 		{
124 | 			newParent.AppendChild(child);
125 | 		}
126 | 
127 | 		override protected bool HasChildren(XmlElement element)
128 | 		{
129 | 			return element.HasChildNodes;
130 | 		}
131 | 
132 | 		override protected XmlElement CreateElement(string ns, string name, HtmlAttributes attributes, XmlElement form) {
133 | 			XmlElement rv = CreateElement(ns, name, attributes);
134 | 			//rv.setUserData("nu.validator.form-pointer", form, null); // TODO
135 | 			return rv;
136 | 		}
137 | 
138 | 		override protected void Start(bool fragment) {
139 | 			document = new XmlDocument(); // implementation.createDocument(null, null, null);
140 | 			// TODO: fragment?
141 | 		}
142 | 
143 | 		protected override void ReceiveDocumentMode(DocumentMode mode, String publicIdentifier,
144 | 				String systemIdentifier, bool html4SpecificAdditionalErrorChecks)
145 | 				{
146 | 			//document.setUserData("nu.validator.document-mode", mode, null); // TODO
147 | 		}
148 | 
149 | 		/// <summary>
150 | 		/// Returns the document.
151 | 		/// </summary>
152 | 		/// <returns>The document</returns>
153 | 		internal XmlDocument Document
154 | 		{
155 | 			get
156 | 			{
157 | 				return document;
158 | 			}
159 | 		}
160 | 
161 | 		/// <summary>
162 | 		/// Return the document fragment.
163 | 		/// </summary>
164 | 		/// <returns>The document fragment</returns>
165 | 		internal XmlDocumentFragment getDocumentFragment() {
166 | 			XmlDocumentFragment rv = document.CreateDocumentFragment();
167 | 			XmlNode rootElt = document.FirstChild;
168 | 			while (rootElt.HasChildNodes) {
169 | 				rv.AppendChild(rootElt.FirstChild);
170 | 			}
171 | 			document = null;
172 | 			return rv;
173 | 		}
174 | 
175 | 		override protected void InsertFosterParentedCharacters(string text,	XmlElement table, XmlElement stackParent) {
176 | 			XmlNode parent = table.ParentNode;
177 | 			if (parent != null) { // always an element if not null
178 | 				XmlNode previousSibling = table.PreviousSibling;
179 | 				if (previousSibling != null
180 | 						&& previousSibling.NodeType == XmlNodeType.Text) {
181 | 					XmlText lastAsText = (XmlText) previousSibling;
182 | 					lastAsText.Data += text;
183 | 					return;
184 | 				}
185 | 				parent.InsertBefore(document.CreateTextNode(text), table);
186 | 				return;
187 | 			}
188 | 			XmlNode lastChild = stackParent.LastChild;
189 | 			if (lastChild != null && lastChild.NodeType == XmlNodeType.Text) {
190 | 				XmlText lastAsText = (XmlText) lastChild;
191 | 				lastAsText.Data += text;
192 | 				return;
193 | 			}
194 | 			stackParent.AppendChild(document.CreateTextNode(text));
195 | 		}
196 | 
197 | 		override protected void InsertFosterParentedChild(XmlElement child, XmlElement table, XmlElement stackParent) {
198 | 			XmlNode parent = table.ParentNode;
199 | 			if (parent != null) { // always an element if not null
200 | 				parent.InsertBefore(child, table);
201 | 			} else {
202 | 				stackParent.AppendChild(child);
203 | 			}
204 | 		}
205 | 
206 | 		override protected void DetachFromParent(XmlElement element)
207 | 		{
208 | 			XmlNode parent = element.ParentNode;
209 | 			if (parent != null) {
210 | 				parent.RemoveChild(element);
211 | 			}
212 | 		}
213 | 	}
214 | }
215 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/ElementName.cs:
--------------------------------------------------------------------------------
   1 | ﻿/*
   2 |  * Copyright (c) 2008-2011 Mozilla Foundation
   3 |  * Copyright (c) 2012 Patrick Reisert
   4 |  *
   5 |  * Permission is hereby granted, free of charge, to any person obtaining a 
   6 |  * copy of this software and associated documentation files (the "Software"), 
   7 |  * to deal in the Software without restriction, including without limitation 
   8 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
   9 |  * and/or sell copies of the Software, and to permit persons to whom the 
  10 |  * Software is furnished to do so, subject to the following conditions:
  11 |  *
  12 |  * The above copyright notice and this permission notice shall be included in 
  13 |  * all copies or substantial portions of the Software.
  14 |  *
  15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
  18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
  19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
  20 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
  21 |  * DEALINGS IN THE SOFTWARE.
  22 |  */
  23 | 
  24 | using System;
  25 | using HtmlParserSharp.Common;
  26 | 
  27 | namespace HtmlParserSharp.Core
  28 | {
  29 | 
  30 | 	public sealed class ElementName
  31 | 	// uncomment when regenerating self
  32 | 	//        implements Comparable<ElementName> 
  33 | 	{
  34 | 
  35 | 		/// <summary>
  36 | 		/// The mask for extracting the dispatch group.
  37 | 		/// </summary>
  38 | 		public const int GROUP_MASK = 127;
  39 | 
  40 | 		/// <summary>
  41 | 		/// Indicates that the element is not a pre-interned element. Forbidden
  42 | 		/// on preinterned elements.
  43 | 		/// </summary>
  44 | 		public const int CUSTOM = (1 << 30);
  45 | 
  46 | 		/// <summary>
  47 | 		/// Indicates that the element is in the "special" category. This bit
  48 | 		/// should not be pre-set on MathML or SVG specials--only on HTML specials.
  49 | 		/// </summary>
  50 | 		public const int SPECIAL = (1 << 29);
  51 | 
  52 | 		/// <summary>
  53 | 		/// The element is foster-parenting. This bit should be pre-set on elements
  54 | 		/// that are foster-parenting as HTML.
  55 | 		/// </summary>
  56 | 		public const int FOSTER_PARENTING = (1 << 28);
  57 | 
  58 | 		/// <summary>
  59 | 		/// The element is scoping. This bit should be pre-set on elements
  60 | 		/// that are scoping as HTML.
  61 | 		/// </summary>
  62 | 		public const int SCOPING = (1 << 27);
  63 | 
  64 | 		/// <summary>
  65 | 		/// The element is scoping as SVG.
  66 | 		/// </summary>
  67 | 		public const int SCOPING_AS_SVG = (1 << 26);
  68 | 
  69 | 		/// <summary>
  70 | 		/// The element is scoping as MathML.
  71 | 		/// </summary>
  72 | 		public const int SCOPING_AS_MATHML = (1 << 25);
  73 | 
  74 | 		/// <summary>
  75 | 		/// The element is an HTML integration point.
  76 | 		/// </summary>
  77 | 		public const int HTML_INTEGRATION_POINT = (1 << 24);
  78 | 
  79 | 		/// <summary>
  80 | 		/// The element has an optional end tag.
  81 | 		/// </summary>
  82 | 		public const int OPTIONAL_END_TAG = (1 << 23);
  83 | 
  84 | 		public static readonly ElementName NULL_ELEMENT_NAME = new ElementName(null);
  85 | 
  86 | 		[Local]
  87 | 		public readonly string name;
  88 | 
  89 | 		[Local]
  90 | 		public readonly string camelCaseName;
  91 | 
  92 | 		/// <summary>
  93 | 		/// The lowest 7 bits are the dispatch group. The high bits are flags.
  94 | 		/// </summary>
  95 | 		public readonly int flags;
  96 | 
  97 | 		public int Flags
  98 | 		{
  99 | 			get
 100 | 			{
 101 | 				return flags;
 102 | 			}
 103 | 		}
 104 | 
 105 | 		public DispatchGroup Group
 106 | 		{
 107 | 			get
 108 | 			{
 109 | 				return (DispatchGroup)(flags & GROUP_MASK);
 110 | 			}
 111 | 		}
 112 | 
 113 | 		// [NOCPP[
 114 | 
 115 | 		public bool IsCustom
 116 | 		{
 117 | 			get
 118 | 			{
 119 | 				return (flags & CUSTOM) != 0;
 120 | 			}
 121 | 		}
 122 | 
 123 | 		// ]NOCPP]
 124 | 
 125 | 		internal static ElementName ElementNameByBuffer(char[] buf, int offset, int length)
 126 | 		{
 127 | 			int hash = ElementName.BufToHash(buf, length);
 128 | 			int index = Array.BinarySearch<int>(ElementName.ELEMENT_HASHES, hash);
 129 | 			if (index < 0)
 130 | 			{
 131 | 				return new ElementName(Portability.NewLocalNameFromBuffer(buf, offset, length));
 132 | 			}
 133 | 			else
 134 | 			{
 135 | 				ElementName elementName = ElementName.ELEMENT_NAMES[index];
 136 | 				/*[Local]*/
 137 | 				string name = elementName.name;
 138 | 				if (!Portability.LocalEqualsBuffer(name, buf, offset, length))
 139 | 				{
 140 | 					return new ElementName(Portability.NewLocalNameFromBuffer(buf,
 141 | 							offset, length));
 142 | 				}
 143 | 				return elementName;
 144 | 			}
 145 | 		}
 146 | 
 147 | 		/// <summary>
 148 | 		/// This method has to return a unique integer for each well-known
 149 | 		/// lower-cased element name.
 150 | 		/// </summary>
 151 | 		private static int BufToHash(char[] buf, int len)
 152 | 		{
 153 | 			int hash = len;
 154 | 			hash <<= 5;
 155 | 			hash += buf[0] - 0x60;
 156 | 			int j = len;
 157 | 			for (int i = 0; i < 4 && j > 0; i++)
 158 | 			{
 159 | 				j--;
 160 | 				hash <<= 5;
 161 | 				hash += buf[j] - 0x60;
 162 | 			}
 163 | 			return hash;
 164 | 		}
 165 | 
 166 | 		private ElementName([Local] string name, [Local] string camelCaseName, int flags)
 167 | 		{
 168 | 			this.name = name;
 169 | 			this.camelCaseName = camelCaseName;
 170 | 			this.flags = flags;
 171 | 		}
 172 | 
 173 | 		internal ElementName([Local] string name)
 174 | 		{
 175 | 			this.name = name;
 176 | 			this.camelCaseName = name;
 177 | 			this.flags = (int) DispatchGroup.OTHER | CUSTOM;
 178 | 		}
 179 | 
 180 | 		/*virtual*/	public ElementName CloneElementName()
 181 | 		{
 182 | 			return this;
 183 | 		}
 184 | 
 185 | 		// START CODE ONLY USED FOR GENERATING CODE uncomment and run to regenerate
 186 | 
 187 | 		///// <summary>
 188 | 		///// Returns a <see cref="System.String"/> that represents this instance.
 189 | 		///// </summary>
 190 | 		///// <returns>
 191 | 		///// A <see cref="System.String"/> that represents this instance.
 192 | 		///// </returns>
 193 | 		//override public string ToString() {
 194 | 		//    return "(\"" + name + "\", \"" + camelCaseName + "\", " + decomposedFlags() + ")";
 195 | 		//}
 196 | 
 197 | 		//private string DecomposedFlags() {
 198 | 		//    StringBuilder buf = new StringBuilder("TreeBuilderConstants.");
 199 | 		//    buf.Append(treeBuilderGroupToName());
 200 | 		//    if ((flags & SPECIAL) != 0) {
 201 | 		//        buf.Append(" | SPECIAL");
 202 | 		//    }
 203 | 		//    if ((flags & FOSTER_PARENTING) != 0) {
 204 | 		//        buf.Append(" | FOSTER_PARENTING");
 205 | 		//    }
 206 | 		//    if ((flags & SCOPING) != 0) {
 207 | 		//        buf.Append(" | SCOPING");
 208 | 		//    }        
 209 | 		//    if ((flags & SCOPING_AS_MATHML) != 0) {
 210 | 		//        buf.Append(" | SCOPING_AS_MATHML");
 211 | 		//    }
 212 | 		//    if ((flags & SCOPING_AS_SVG) != 0) {
 213 | 		//        buf.Append(" | SCOPING_AS_SVG");
 214 | 		//    }
 215 | 		//    if ((flags & OPTIONAL_END_TAG) != 0) {
 216 | 		//        buf.Append(" | OPTIONAL_END_TAG");
 217 | 		//    }
 218 | 		//    return buf.ToString();
 219 | 		//}
 220 | 
 221 | 		//private string constName() {
 222 | 		//    char[] buf = new char[name.Length];
 223 | 		//    for (int i = 0; i < name.Length; i++) {
 224 | 		//        char c = name[i];
 225 | 		//        if (c == '-') {
 226 | 		//            buf[i] = '_';
 227 | 		//        } else if (c >= '0' && c <= '9') {
 228 | 		//            buf[i] = c;
 229 | 		//        } else {
 230 | 		//            buf[i] = (char) (c - 0x20);
 231 | 		//        }
 232 | 		//    }
 233 | 		//    return new String(buf);
 234 | 		//}
 235 | 
 236 | 		//private int hash() {
 237 | 		//    return BufToHash(name.ToCharArray(), name.Length);
 238 | 		//}
 239 | 
 240 | 		//public int CompareTo(ElementName other) {
 241 | 		//    int thisHash = this.hash();
 242 | 		//    int otherHash = other.hash();
 243 | 		//    if (thisHash < otherHash) {
 244 | 		//        return -1;
 245 | 		//    } else if (thisHash == otherHash) {
 246 | 		//        return 0;
 247 | 		//    } else {
 248 | 		//        return 1;
 249 | 		//    }
 250 | 		//}
 251 | 
 252 | 		//private string TreeBuilderGroupToName() {
 253 | 		//    switch (GetGroup()) {
 254 | 		//        case TreeBuilderConstants.OTHER:
 255 | 		//            return "OTHER";
 256 | 		//        case TreeBuilderConstants.A:
 257 | 		//            return "A";
 258 | 		//        case TreeBuilderConstants.BASE:
 259 | 		//            return "BASE";
 260 | 		//        case TreeBuilderConstants.BODY:
 261 | 		//            return "BODY";
 262 | 		//        case TreeBuilderConstants.BR:
 263 | 		//            return "BR";
 264 | 		//        case TreeBuilderConstants.BUTTON:
 265 | 		//            return "BUTTON";
 266 | 		//        case TreeBuilderConstants.CAPTION:
 267 | 		//            return "CAPTION";
 268 | 		//        case TreeBuilderConstants.COL:
 269 | 		//            return "COL";
 270 | 		//        case TreeBuilderConstants.COLGROUP:
 271 | 		//            return "COLGROUP";
 272 | 		//        case TreeBuilderConstants.FONT:
 273 | 		//            return "FONT";
 274 | 		//        case TreeBuilderConstants.FORM:
 275 | 		//            return "FORM";
 276 | 		//        case TreeBuilderConstants.FRAME:
 277 | 		//            return "FRAME";
 278 | 		//        case TreeBuilderConstants.FRAMESET:
 279 | 		//            return "FRAMESET";
 280 | 		//        case TreeBuilderConstants.IMAGE:
 281 | 		//            return "IMAGE";
 282 | 		//        case TreeBuilderConstants.INPUT:
 283 | 		//            return "INPUT";
 284 | 		//        case TreeBuilderConstants.ISINDEX:
 285 | 		//            return "ISINDEX";
 286 | 		//        case TreeBuilderConstants.LI:
 287 | 		//            return "LI";
 288 | 		//        case TreeBuilderConstants.LINK_OR_BASEFONT_OR_BGSOUND:
 289 | 		//            return "LINK_OR_BASEFONT_OR_BGSOUND";
 290 | 		//        case TreeBuilderConstants.MATH:
 291 | 		//            return "MATH";
 292 | 		//        case TreeBuilderConstants.META:
 293 | 		//            return "META";
 294 | 		//        case TreeBuilderConstants.SVG:
 295 | 		//            return "SVG";
 296 | 		//        case TreeBuilderConstants.HEAD:
 297 | 		//            return "HEAD";
 298 | 		//        case TreeBuilderConstants.HR:
 299 | 		//            return "HR";
 300 | 		//        case TreeBuilderConstants.HTML:
 301 | 		//            return "HTML";
 302 | 		//        case TreeBuilderConstants.KEYGEN:
 303 | 		//            return "KEYGEN";
 304 | 		//        case TreeBuilderConstants.NOBR:
 305 | 		//            return "NOBR";
 306 | 		//        case TreeBuilderConstants.NOFRAMES:
 307 | 		//            return "NOFRAMES";
 308 | 		//        case TreeBuilderConstants.NOSCRIPT:
 309 | 		//            return "NOSCRIPT";
 310 | 		//        case TreeBuilderConstants.OPTGROUP:
 311 | 		//            return "OPTGROUP";
 312 | 		//        case TreeBuilderConstants.OPTION:
 313 | 		//            return "OPTION";
 314 | 		//        case TreeBuilderConstants.P:
 315 | 		//            return "P";
 316 | 		//        case TreeBuilderConstants.PLAINTEXT:
 317 | 		//            return "PLAINTEXT";
 318 | 		//        case TreeBuilderConstants.SCRIPT:
 319 | 		//            return "SCRIPT";
 320 | 		//        case TreeBuilderConstants.SELECT:
 321 | 		//            return "SELECT";
 322 | 		//        case TreeBuilderConstants.STYLE:
 323 | 		//            return "STYLE";
 324 | 		//        case TreeBuilderConstants.TABLE:
 325 | 		//            return "TABLE";
 326 | 		//        case TreeBuilderConstants.TEXTAREA:
 327 | 		//            return "TEXTAREA";
 328 | 		//        case TreeBuilderConstants.TITLE:
 329 | 		//            return "TITLE";
 330 | 		//        case TreeBuilderConstants.TR:
 331 | 		//            return "TR";
 332 | 		//        case TreeBuilderConstants.XMP:
 333 | 		//            return "XMP";
 334 | 		//        case TreeBuilderConstants.TBODY_OR_THEAD_OR_TFOOT:
 335 | 		//            return "TBODY_OR_THEAD_OR_TFOOT";
 336 | 		//        case TreeBuilderConstants.TD_OR_TH:
 337 | 		//            return "TD_OR_TH";
 338 | 		//        case TreeBuilderConstants.DD_OR_DT:
 339 | 		//            return "DD_OR_DT";
 340 | 		//        case TreeBuilderConstants.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
 341 | 		//            return "H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6";
 342 | 		//        case TreeBuilderConstants.OBJECT:
 343 | 		//            return "OBJECT";
 344 | 		//        case TreeBuilderConstants.OUTPUT_OR_LABEL:
 345 | 		//            return "OUTPUT_OR_LABEL";
 346 | 		//        case TreeBuilderConstants.MARQUEE_OR_APPLET:
 347 | 		//            return "MARQUEE_OR_APPLET";
 348 | 		//        case TreeBuilderConstants.PRE_OR_LISTING:
 349 | 		//            return "PRE_OR_LISTING";
 350 | 		//        case TreeBuilderConstants.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
 351 | 		//            return "B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U";
 352 | 		//        case TreeBuilderConstants.UL_OR_OL_OR_DL:
 353 | 		//            return "UL_OR_OL_OR_DL";
 354 | 		//        case TreeBuilderConstants.IFRAME:
 355 | 		//            return "IFRAME";
 356 | 		//        case TreeBuilderConstants.NOEMBED:
 357 | 		//            return "NOEMBED";
 358 | 		//        case TreeBuilderConstants.EMBED_OR_IMG:
 359 | 		//            return "EMBED_OR_IMG";
 360 | 		//        case TreeBuilderConstants.AREA_OR_WBR:
 361 | 		//            return "AREA_OR_WBR";
 362 | 		//        case TreeBuilderConstants.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
 363 | 		//            return "DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU";
 364 | 		//        case TreeBuilderConstants.FIELDSET:
 365 | 		//            return "FIELDSET";
 366 | 		//        case TreeBuilderConstants.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY:
 367 | 		//            return "ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY";
 368 | 		//        case TreeBuilderConstants.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR:
 369 | 		//            return "RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR";
 370 | 		//        case TreeBuilderConstants.RT_OR_RP:
 371 | 		//            return "RT_OR_RP";
 372 | 		//        case TreeBuilderConstants.COMMAND:
 373 | 		//            return "COMMAND";
 374 | 		//        case TreeBuilderConstants.PARAM_OR_SOURCE_OR_TRACK:
 375 | 		//            return "PARAM_OR_SOURCE_OR_TRACK";
 376 | 		//        case TreeBuilderConstants.MGLYPH_OR_MALIGNMARK:
 377 | 		//            return "MGLYPH_OR_MALIGNMARK";
 378 | 		//        case TreeBuilderConstants.MI_MO_MN_MS_MTEXT:
 379 | 		//            return "MI_MO_MN_MS_MTEXT";
 380 | 		//        case TreeBuilderConstants.ANNOTATION_XML:
 381 | 		//            return "ANNOTATION_XML";
 382 | 		//        case TreeBuilderConstants.FOREIGNOBJECT_OR_DESC:
 383 | 		//            return "FOREIGNOBJECT_OR_DESC";
 384 | 		//    }
 385 | 		//    return null;
 386 | 		//}
 387 | 
 388 | 		///**
 389 | 		// * Regenerate self
 390 | 		// * 
 391 | 		// * @param args
 392 | 		// */
 393 | 		//public static void main(String[] args) {
 394 | 		//    Arrays.sort(ELEMENT_NAMES);
 395 | 		//    for (int i = 1; i < ELEMENT_NAMES.length; i++) {
 396 | 		//        if (ELEMENT_NAMES[i].hash() == ELEMENT_NAMES[i - 1].hash()) {
 397 | 		//            System.err.println("Hash collision: " + ELEMENT_NAMES[i].name
 398 | 		//                    + ", " + ELEMENT_NAMES[i - 1].name);
 399 | 		//            return;
 400 | 		//        }
 401 | 		//    }
 402 | 		//    for (int i = 0; i < ELEMENT_NAMES.length; i++) {
 403 | 		//        ElementName el = ELEMENT_NAMES[i];
 404 | 		//        System.out.println("public static readonly ElementName "
 405 | 		//                + el.constName() + " = new ElementName" + el.toString()
 406 | 		//                + ";");
 407 | 		//    }
 408 | 		//    System.out.println("private final static @NoLength ElementName[] ELEMENT_NAMES = {");
 409 | 		//    for (int i = 0; i < ELEMENT_NAMES.length; i++) {
 410 | 		//        ElementName el = ELEMENT_NAMES[i];
 411 | 		//        System.out.println(el.constName() + ",");
 412 | 		//    }
 413 | 		//    System.out.println("};");
 414 | 		//    System.out.println("private final static int[] ELEMENT_HASHES = {");
 415 | 		//    for (int i = 0; i < ELEMENT_NAMES.length; i++) {
 416 | 		//        ElementName el = ELEMENT_NAMES[i];
 417 | 		//        System.out.println(Integer.toString(el.hash()) + ",");
 418 | 		//    }
 419 | 		//    System.out.println("};");
 420 | 		//}
 421 | 
 422 | 		// START GENERATED CODE
 423 | 		public static readonly ElementName A = new ElementName("a", "a", (int) DispatchGroup.A);
 424 | 		public static readonly ElementName B = new ElementName("b", "b", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 425 | 		public static readonly ElementName G = new ElementName("g", "g", (int) DispatchGroup.OTHER);
 426 | 		public static readonly ElementName I = new ElementName("i", "i", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 427 | 		public static readonly ElementName P = new ElementName("p", "p", (int) DispatchGroup.P | SPECIAL | OPTIONAL_END_TAG);
 428 | 		public static readonly ElementName Q = new ElementName("q", "q", (int) DispatchGroup.OTHER);
 429 | 		public static readonly ElementName S = new ElementName("s", "s", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 430 | 		public static readonly ElementName U = new ElementName("u", "u", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 431 | 		public static readonly ElementName BR = new ElementName("br", "br", (int) DispatchGroup.BR | SPECIAL);
 432 | 		public static readonly ElementName CI = new ElementName("ci", "ci", (int) DispatchGroup.OTHER);
 433 | 		public static readonly ElementName CN = new ElementName("cn", "cn", (int) DispatchGroup.OTHER);
 434 | 		public static readonly ElementName DD = new ElementName("dd", "dd", (int) DispatchGroup.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG);
 435 | 		public static readonly ElementName DL = new ElementName("dl", "dl", (int) DispatchGroup.UL_OR_OL_OR_DL | SPECIAL);
 436 | 		public static readonly ElementName DT = new ElementName("dt", "dt", (int) DispatchGroup.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG);
 437 | 		public static readonly ElementName EM = new ElementName("em", "em", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 438 | 		public static readonly ElementName EQ = new ElementName("eq", "eq", (int) DispatchGroup.OTHER);
 439 | 		public static readonly ElementName FN = new ElementName("fn", "fn", (int) DispatchGroup.OTHER);
 440 | 		public static readonly ElementName H1 = new ElementName("h1", "h1", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
 441 | 		public static readonly ElementName H2 = new ElementName("h2", "h2", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
 442 | 		public static readonly ElementName H3 = new ElementName("h3", "h3", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
 443 | 		public static readonly ElementName H4 = new ElementName("h4", "h4", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
 444 | 		public static readonly ElementName H5 = new ElementName("h5", "h5", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
 445 | 		public static readonly ElementName H6 = new ElementName("h6", "h6", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
 446 | 		public static readonly ElementName GT = new ElementName("gt", "gt", (int) DispatchGroup.OTHER);
 447 | 		public static readonly ElementName HR = new ElementName("hr", "hr", (int) DispatchGroup.HR | SPECIAL);
 448 | 		public static readonly ElementName IN = new ElementName("in", "in", (int) DispatchGroup.OTHER);
 449 | 		public static readonly ElementName LI = new ElementName("li", "li", (int) DispatchGroup.LI | SPECIAL | OPTIONAL_END_TAG);
 450 | 		public static readonly ElementName LN = new ElementName("ln", "ln", (int) DispatchGroup.OTHER);
 451 | 		public static readonly ElementName LT = new ElementName("lt", "lt", (int) DispatchGroup.OTHER);
 452 | 		public static readonly ElementName MI = new ElementName("mi", "mi", (int) DispatchGroup.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
 453 | 		public static readonly ElementName MN = new ElementName("mn", "mn", (int) DispatchGroup.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
 454 | 		public static readonly ElementName MO = new ElementName("mo", "mo", (int) DispatchGroup.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
 455 | 		public static readonly ElementName MS = new ElementName("ms", "ms", (int) DispatchGroup.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
 456 | 		public static readonly ElementName OL = new ElementName("ol", "ol", (int) DispatchGroup.UL_OR_OL_OR_DL | SPECIAL);
 457 | 		public static readonly ElementName OR = new ElementName("or", "or", (int) DispatchGroup.OTHER);
 458 | 		public static readonly ElementName PI = new ElementName("pi", "pi", (int) DispatchGroup.OTHER);
 459 | 		public static readonly ElementName RP = new ElementName("rp", "rp", (int) DispatchGroup.RT_OR_RP | OPTIONAL_END_TAG);
 460 | 		public static readonly ElementName RT = new ElementName("rt", "rt", (int) DispatchGroup.RT_OR_RP | OPTIONAL_END_TAG);
 461 | 		public static readonly ElementName TD = new ElementName("td", "td", (int) DispatchGroup.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG);
 462 | 		public static readonly ElementName TH = new ElementName("th", "th", (int) DispatchGroup.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG);
 463 | 		public static readonly ElementName TR = new ElementName("tr", "tr", (int) DispatchGroup.TR | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
 464 | 		public static readonly ElementName TT = new ElementName("tt", "tt", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 465 | 		public static readonly ElementName UL = new ElementName("ul", "ul", (int) DispatchGroup.UL_OR_OL_OR_DL | SPECIAL);
 466 | 		public static readonly ElementName AND = new ElementName("and", "and", (int) DispatchGroup.OTHER);
 467 | 		public static readonly ElementName ARG = new ElementName("arg", "arg", (int) DispatchGroup.OTHER);
 468 | 		public static readonly ElementName ABS = new ElementName("abs", "abs", (int) DispatchGroup.OTHER);
 469 | 		public static readonly ElementName BIG = new ElementName("big", "big", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 470 | 		public static readonly ElementName BDO = new ElementName("bdo", "bdo", (int) DispatchGroup.OTHER);
 471 | 		public static readonly ElementName CSC = new ElementName("csc", "csc", (int) DispatchGroup.OTHER);
 472 | 		public static readonly ElementName COL = new ElementName("col", "col", (int) DispatchGroup.COL | SPECIAL);
 473 | 		public static readonly ElementName COS = new ElementName("cos", "cos", (int) DispatchGroup.OTHER);
 474 | 		public static readonly ElementName COT = new ElementName("cot", "cot", (int) DispatchGroup.OTHER);
 475 | 		public static readonly ElementName DEL = new ElementName("del", "del", (int) DispatchGroup.OTHER);
 476 | 		public static readonly ElementName DFN = new ElementName("dfn", "dfn", (int) DispatchGroup.OTHER);
 477 | 		public static readonly ElementName DIR = new ElementName("dir", "dir", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 478 | 		public static readonly ElementName DIV = new ElementName("div", "div", (int) DispatchGroup.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
 479 | 		public static readonly ElementName EXP = new ElementName("exp", "exp", (int) DispatchGroup.OTHER);
 480 | 		public static readonly ElementName GCD = new ElementName("gcd", "gcd", (int) DispatchGroup.OTHER);
 481 | 		public static readonly ElementName GEQ = new ElementName("geq", "geq", (int) DispatchGroup.OTHER);
 482 | 		public static readonly ElementName IMG = new ElementName("img", "img", (int) DispatchGroup.EMBED_OR_IMG | SPECIAL);
 483 | 		public static readonly ElementName INS = new ElementName("ins", "ins", (int) DispatchGroup.OTHER);
 484 | 		public static readonly ElementName INT = new ElementName("int", "int", (int) DispatchGroup.OTHER);
 485 | 		public static readonly ElementName KBD = new ElementName("kbd", "kbd", (int) DispatchGroup.OTHER);
 486 | 		public static readonly ElementName LOG = new ElementName("log", "log", (int) DispatchGroup.OTHER);
 487 | 		public static readonly ElementName LCM = new ElementName("lcm", "lcm", (int) DispatchGroup.OTHER);
 488 | 		public static readonly ElementName LEQ = new ElementName("leq", "leq", (int) DispatchGroup.OTHER);
 489 | 		public static readonly ElementName MTD = new ElementName("mtd", "mtd", (int) DispatchGroup.OTHER);
 490 | 		public static readonly ElementName MIN = new ElementName("min", "min", (int) DispatchGroup.OTHER);
 491 | 		public static readonly ElementName MAP = new ElementName("map", "map", (int) DispatchGroup.OTHER);
 492 | 		public static readonly ElementName MTR = new ElementName("mtr", "mtr", (int) DispatchGroup.OTHER);
 493 | 		public static readonly ElementName MAX = new ElementName("max", "max", (int) DispatchGroup.OTHER);
 494 | 		public static readonly ElementName NEQ = new ElementName("neq", "neq", (int) DispatchGroup.OTHER);
 495 | 		public static readonly ElementName NOT = new ElementName("not", "not", (int) DispatchGroup.OTHER);
 496 | 		public static readonly ElementName NAV = new ElementName("nav", "nav", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 497 | 		public static readonly ElementName PRE = new ElementName("pre", "pre", (int) DispatchGroup.PRE_OR_LISTING | SPECIAL);
 498 | 		public static readonly ElementName REM = new ElementName("rem", "rem", (int) DispatchGroup.OTHER);
 499 | 		public static readonly ElementName SUB = new ElementName("sub", "sub", (int) DispatchGroup.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
 500 | 		public static readonly ElementName SEC = new ElementName("sec", "sec", (int) DispatchGroup.OTHER);
 501 | 		public static readonly ElementName SVG = new ElementName("svg", "svg", (int) DispatchGroup.SVG);
 502 | 		public static readonly ElementName SUM = new ElementName("sum", "sum", (int) DispatchGroup.OTHER);
 503 | 		public static readonly ElementName SIN = new ElementName("sin", "sin", (int) DispatchGroup.OTHER);
 504 | 		public static readonly ElementName SEP = new ElementName("sep", "sep", (int) DispatchGroup.OTHER);
 505 | 		public static readonly ElementName SUP = new ElementName("sup", "sup", (int) DispatchGroup.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
 506 | 		public static readonly ElementName SET = new ElementName("set", "set", (int) DispatchGroup.OTHER);
 507 | 		public static readonly ElementName TAN = new ElementName("tan", "tan", (int) DispatchGroup.OTHER);
 508 | 		public static readonly ElementName USE = new ElementName("use", "use", (int) DispatchGroup.OTHER);
 509 | 		public static readonly ElementName VAR = new ElementName("var", "var", (int) DispatchGroup.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
 510 | 		public static readonly ElementName WBR = new ElementName("wbr", "wbr", (int) DispatchGroup.AREA_OR_WBR | SPECIAL);
 511 | 		public static readonly ElementName XMP = new ElementName("xmp", "xmp", (int) DispatchGroup.XMP);
 512 | 		public static readonly ElementName XOR = new ElementName("xor", "xor", (int) DispatchGroup.OTHER);
 513 | 		public static readonly ElementName AREA = new ElementName("area", "area", (int) DispatchGroup.AREA_OR_WBR | SPECIAL);
 514 | 		public static readonly ElementName ABBR = new ElementName("abbr", "abbr", (int) DispatchGroup.OTHER);
 515 | 		public static readonly ElementName BASE = new ElementName("base", "base", (int) DispatchGroup.BASE | SPECIAL);
 516 | 		public static readonly ElementName BVAR = new ElementName("bvar", "bvar", (int) DispatchGroup.OTHER);
 517 | 		public static readonly ElementName BODY = new ElementName("body", "body", (int) DispatchGroup.BODY | SPECIAL | OPTIONAL_END_TAG);
 518 | 		public static readonly ElementName CARD = new ElementName("card", "card", (int) DispatchGroup.OTHER);
 519 | 		public static readonly ElementName CODE = new ElementName("code", "code", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 520 | 		public static readonly ElementName CITE = new ElementName("cite", "cite", (int) DispatchGroup.OTHER);
 521 | 		public static readonly ElementName CSCH = new ElementName("csch", "csch", (int) DispatchGroup.OTHER);
 522 | 		public static readonly ElementName COSH = new ElementName("cosh", "cosh", (int) DispatchGroup.OTHER);
 523 | 		public static readonly ElementName COTH = new ElementName("coth", "coth", (int) DispatchGroup.OTHER);
 524 | 		public static readonly ElementName CURL = new ElementName("curl", "curl", (int) DispatchGroup.OTHER);
 525 | 		public static readonly ElementName DESC = new ElementName("desc", "desc", (int) DispatchGroup.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG);
 526 | 		public static readonly ElementName DIFF = new ElementName("diff", "diff", (int) DispatchGroup.OTHER);
 527 | 		public static readonly ElementName DEFS = new ElementName("defs", "defs", (int) DispatchGroup.OTHER);
 528 | 		public static readonly ElementName FORM = new ElementName("form", "form", (int) DispatchGroup.FORM | SPECIAL);
 529 | 		public static readonly ElementName FONT = new ElementName("font", "font", (int) DispatchGroup.FONT);
 530 | 		public static readonly ElementName GRAD = new ElementName("grad", "grad", (int) DispatchGroup.OTHER);
 531 | 		public static readonly ElementName HEAD = new ElementName("head", "head", (int) DispatchGroup.HEAD | SPECIAL | OPTIONAL_END_TAG);
 532 | 		public static readonly ElementName HTML = new ElementName("html", "html", (int) DispatchGroup.HTML | SPECIAL | SCOPING | OPTIONAL_END_TAG);
 533 | 		public static readonly ElementName LINE = new ElementName("line", "line", (int) DispatchGroup.OTHER);
 534 | 		public static readonly ElementName LINK = new ElementName("link", "link", (int) DispatchGroup.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL);
 535 | 		public static readonly ElementName LIST = new ElementName("list", "list", (int) DispatchGroup.OTHER);
 536 | 		public static readonly ElementName META = new ElementName("meta", "meta", (int) DispatchGroup.META | SPECIAL);
 537 | 		public static readonly ElementName MSUB = new ElementName("msub", "msub", (int) DispatchGroup.OTHER);
 538 | 		public static readonly ElementName MODE = new ElementName("mode", "mode", (int) DispatchGroup.OTHER);
 539 | 		public static readonly ElementName MATH = new ElementName("math", "math", (int) DispatchGroup.MATH);
 540 | 		public static readonly ElementName MARK = new ElementName("mark", "mark", (int) DispatchGroup.OTHER);
 541 | 		public static readonly ElementName MASK = new ElementName("mask", "mask", (int) DispatchGroup.OTHER);
 542 | 		public static readonly ElementName MEAN = new ElementName("mean", "mean", (int) DispatchGroup.OTHER);
 543 | 		public static readonly ElementName MSUP = new ElementName("msup", "msup", (int) DispatchGroup.OTHER);
 544 | 		public static readonly ElementName MENU = new ElementName("menu", "menu", (int) DispatchGroup.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
 545 | 		public static readonly ElementName MROW = new ElementName("mrow", "mrow", (int) DispatchGroup.OTHER);
 546 | 		public static readonly ElementName NONE = new ElementName("none", "none", (int) DispatchGroup.OTHER);
 547 | 		public static readonly ElementName NOBR = new ElementName("nobr", "nobr", (int) DispatchGroup.NOBR);
 548 | 		public static readonly ElementName NEST = new ElementName("nest", "nest", (int) DispatchGroup.OTHER);
 549 | 		public static readonly ElementName PATH = new ElementName("path", "path", (int) DispatchGroup.OTHER);
 550 | 		public static readonly ElementName PLUS = new ElementName("plus", "plus", (int) DispatchGroup.OTHER);
 551 | 		public static readonly ElementName RULE = new ElementName("rule", "rule", (int) DispatchGroup.OTHER);
 552 | 		public static readonly ElementName REAL = new ElementName("real", "real", (int) DispatchGroup.OTHER);
 553 | 		public static readonly ElementName RELN = new ElementName("reln", "reln", (int) DispatchGroup.OTHER);
 554 | 		public static readonly ElementName RECT = new ElementName("rect", "rect", (int) DispatchGroup.OTHER);
 555 | 		public static readonly ElementName ROOT = new ElementName("root", "root", (int) DispatchGroup.OTHER);
 556 | 		public static readonly ElementName RUBY = new ElementName("ruby", "ruby", (int) DispatchGroup.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
 557 | 		public static readonly ElementName SECH = new ElementName("sech", "sech", (int) DispatchGroup.OTHER);
 558 | 		public static readonly ElementName SINH = new ElementName("sinh", "sinh", (int) DispatchGroup.OTHER);
 559 | 		public static readonly ElementName SPAN = new ElementName("span", "span", (int) DispatchGroup.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
 560 | 		public static readonly ElementName SAMP = new ElementName("samp", "samp", (int) DispatchGroup.OTHER);
 561 | 		public static readonly ElementName STOP = new ElementName("stop", "stop", (int) DispatchGroup.OTHER);
 562 | 		public static readonly ElementName SDEV = new ElementName("sdev", "sdev", (int) DispatchGroup.OTHER);
 563 | 		public static readonly ElementName TIME = new ElementName("time", "time", (int) DispatchGroup.OTHER);
 564 | 		public static readonly ElementName TRUE = new ElementName("true", "true", (int) DispatchGroup.OTHER);
 565 | 		public static readonly ElementName TREF = new ElementName("tref", "tref", (int) DispatchGroup.OTHER);
 566 | 		public static readonly ElementName TANH = new ElementName("tanh", "tanh", (int) DispatchGroup.OTHER);
 567 | 		public static readonly ElementName TEXT = new ElementName("text", "text", (int) DispatchGroup.OTHER);
 568 | 		public static readonly ElementName VIEW = new ElementName("view", "view", (int) DispatchGroup.OTHER);
 569 | 		public static readonly ElementName ASIDE = new ElementName("aside", "aside", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 570 | 		public static readonly ElementName AUDIO = new ElementName("audio", "audio", (int) DispatchGroup.OTHER);
 571 | 		public static readonly ElementName APPLY = new ElementName("apply", "apply", (int) DispatchGroup.OTHER);
 572 | 		public static readonly ElementName EMBED = new ElementName("embed", "embed", (int) DispatchGroup.EMBED_OR_IMG | SPECIAL);
 573 | 		public static readonly ElementName FRAME = new ElementName("frame", "frame", (int) DispatchGroup.FRAME | SPECIAL);
 574 | 		public static readonly ElementName FALSE = new ElementName("false", "false", (int) DispatchGroup.OTHER);
 575 | 		public static readonly ElementName FLOOR = new ElementName("floor", "floor", (int) DispatchGroup.OTHER);
 576 | 		public static readonly ElementName GLYPH = new ElementName("glyph", "glyph", (int) DispatchGroup.OTHER);
 577 | 		public static readonly ElementName HKERN = new ElementName("hkern", "hkern", (int) DispatchGroup.OTHER);
 578 | 		public static readonly ElementName IMAGE = new ElementName("image", "image", (int) DispatchGroup.IMAGE | SPECIAL);
 579 | 		public static readonly ElementName IDENT = new ElementName("ident", "ident", (int) DispatchGroup.OTHER);
 580 | 		public static readonly ElementName INPUT = new ElementName("input", "input", (int) DispatchGroup.INPUT | SPECIAL);
 581 | 		public static readonly ElementName LABEL = new ElementName("label", "label", (int) DispatchGroup.OUTPUT_OR_LABEL);
 582 | 		public static readonly ElementName LIMIT = new ElementName("limit", "limit", (int) DispatchGroup.OTHER);
 583 | 		public static readonly ElementName MFRAC = new ElementName("mfrac", "mfrac", (int) DispatchGroup.OTHER);
 584 | 		public static readonly ElementName MPATH = new ElementName("mpath", "mpath", (int) DispatchGroup.OTHER);
 585 | 		public static readonly ElementName METER = new ElementName("meter", "meter", (int) DispatchGroup.OTHER);
 586 | 		public static readonly ElementName MOVER = new ElementName("mover", "mover", (int) DispatchGroup.OTHER);
 587 | 		public static readonly ElementName MINUS = new ElementName("minus", "minus", (int) DispatchGroup.OTHER);
 588 | 		public static readonly ElementName MROOT = new ElementName("mroot", "mroot", (int) DispatchGroup.OTHER);
 589 | 		public static readonly ElementName MSQRT = new ElementName("msqrt", "msqrt", (int) DispatchGroup.OTHER);
 590 | 		public static readonly ElementName MTEXT = new ElementName("mtext", "mtext", (int) DispatchGroup.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
 591 | 		public static readonly ElementName NOTIN = new ElementName("notin", "notin", (int) DispatchGroup.OTHER);
 592 | 		public static readonly ElementName PIECE = new ElementName("piece", "piece", (int) DispatchGroup.OTHER);
 593 | 		public static readonly ElementName PARAM = new ElementName("param", "param", (int) DispatchGroup.PARAM_OR_SOURCE_OR_TRACK | SPECIAL);
 594 | 		public static readonly ElementName POWER = new ElementName("power", "power", (int) DispatchGroup.OTHER);
 595 | 		public static readonly ElementName REALS = new ElementName("reals", "reals", (int) DispatchGroup.OTHER);
 596 | 		public static readonly ElementName STYLE = new ElementName("style", "style", (int) DispatchGroup.STYLE | SPECIAL);
 597 | 		public static readonly ElementName SMALL = new ElementName("small", "small", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 598 | 		public static readonly ElementName THEAD = new ElementName("thead", "thead", (int) DispatchGroup.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
 599 | 		public static readonly ElementName TABLE = new ElementName("table", "table", (int) DispatchGroup.TABLE | SPECIAL | FOSTER_PARENTING | SCOPING);
 600 | 		public static readonly ElementName TITLE = new ElementName("title", "title", (int) DispatchGroup.TITLE | SPECIAL | SCOPING_AS_SVG);
 601 | 		public static readonly ElementName TRACK = new ElementName("track", "track", (int) DispatchGroup.PARAM_OR_SOURCE_OR_TRACK);
 602 | 		public static readonly ElementName TSPAN = new ElementName("tspan", "tspan", (int) DispatchGroup.OTHER);
 603 | 		public static readonly ElementName TIMES = new ElementName("times", "times", (int) DispatchGroup.OTHER);
 604 | 		public static readonly ElementName TFOOT = new ElementName("tfoot", "tfoot", (int) DispatchGroup.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
 605 | 		public static readonly ElementName TBODY = new ElementName("tbody", "tbody", (int) DispatchGroup.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
 606 | 		public static readonly ElementName UNION = new ElementName("union", "union", (int) DispatchGroup.OTHER);
 607 | 		public static readonly ElementName VKERN = new ElementName("vkern", "vkern", (int) DispatchGroup.OTHER);
 608 | 		public static readonly ElementName VIDEO = new ElementName("video", "video", (int) DispatchGroup.OTHER);
 609 | 		public static readonly ElementName ARCSEC = new ElementName("arcsec", "arcsec", (int) DispatchGroup.OTHER);
 610 | 		public static readonly ElementName ARCCSC = new ElementName("arccsc", "arccsc", (int) DispatchGroup.OTHER);
 611 | 		public static readonly ElementName ARCTAN = new ElementName("arctan", "arctan", (int) DispatchGroup.OTHER);
 612 | 		public static readonly ElementName ARCSIN = new ElementName("arcsin", "arcsin", (int) DispatchGroup.OTHER);
 613 | 		public static readonly ElementName ARCCOS = new ElementName("arccos", "arccos", (int) DispatchGroup.OTHER);
 614 | 		public static readonly ElementName APPLET = new ElementName("applet", "applet", (int) DispatchGroup.MARQUEE_OR_APPLET | SPECIAL | SCOPING);
 615 | 		public static readonly ElementName ARCCOT = new ElementName("arccot", "arccot", (int) DispatchGroup.OTHER);
 616 | 		public static readonly ElementName APPROX = new ElementName("approx", "approx", (int) DispatchGroup.OTHER);
 617 | 		public static readonly ElementName BUTTON = new ElementName("button", "button", (int) DispatchGroup.BUTTON | SPECIAL);
 618 | 		public static readonly ElementName CIRCLE = new ElementName("circle", "circle", (int) DispatchGroup.OTHER);
 619 | 		public static readonly ElementName CENTER = new ElementName("center", "center", (int) DispatchGroup.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
 620 | 		public static readonly ElementName CURSOR = new ElementName("cursor", "cursor", (int) DispatchGroup.OTHER);
 621 | 		public static readonly ElementName CANVAS = new ElementName("canvas", "canvas", (int) DispatchGroup.OTHER);
 622 | 		public static readonly ElementName DIVIDE = new ElementName("divide", "divide", (int) DispatchGroup.OTHER);
 623 | 		public static readonly ElementName DEGREE = new ElementName("degree", "degree", (int) DispatchGroup.OTHER);
 624 | 		public static readonly ElementName DOMAIN = new ElementName("domain", "domain", (int) DispatchGroup.OTHER);
 625 | 		public static readonly ElementName EXISTS = new ElementName("exists", "exists", (int) DispatchGroup.OTHER);
 626 | 		public static readonly ElementName FETILE = new ElementName("fetile", "feTile", (int) DispatchGroup.OTHER);
 627 | 		public static readonly ElementName FIGURE = new ElementName("figure", "figure", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 628 | 		public static readonly ElementName FORALL = new ElementName("forall", "forall", (int) DispatchGroup.OTHER);
 629 | 		public static readonly ElementName FILTER = new ElementName("filter", "filter", (int) DispatchGroup.OTHER);
 630 | 		public static readonly ElementName FOOTER = new ElementName("footer", "footer", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 631 | 		public static readonly ElementName HGROUP = new ElementName("hgroup", "hgroup", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 632 | 		public static readonly ElementName HEADER = new ElementName("header", "header", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 633 | 		public static readonly ElementName IFRAME = new ElementName("iframe", "iframe", (int) DispatchGroup.IFRAME | SPECIAL);
 634 | 		public static readonly ElementName KEYGEN = new ElementName("keygen", "keygen", (int) DispatchGroup.KEYGEN | SPECIAL);
 635 | 		public static readonly ElementName LAMBDA = new ElementName("lambda", "lambda", (int) DispatchGroup.OTHER);
 636 | 		public static readonly ElementName LEGEND = new ElementName("legend", "legend", (int) DispatchGroup.OTHER);
 637 | 		public static readonly ElementName MSPACE = new ElementName("mspace", "mspace", (int) DispatchGroup.OTHER);
 638 | 		public static readonly ElementName MTABLE = new ElementName("mtable", "mtable", (int) DispatchGroup.OTHER);
 639 | 		public static readonly ElementName MSTYLE = new ElementName("mstyle", "mstyle", (int) DispatchGroup.OTHER);
 640 | 		public static readonly ElementName MGLYPH = new ElementName("mglyph", "mglyph", (int) DispatchGroup.MGLYPH_OR_MALIGNMARK);
 641 | 		public static readonly ElementName MEDIAN = new ElementName("median", "median", (int) DispatchGroup.OTHER);
 642 | 		public static readonly ElementName MUNDER = new ElementName("munder", "munder", (int) DispatchGroup.OTHER);
 643 | 		public static readonly ElementName MARKER = new ElementName("marker", "marker", (int) DispatchGroup.OTHER);
 644 | 		public static readonly ElementName MERROR = new ElementName("merror", "merror", (int) DispatchGroup.OTHER);
 645 | 		public static readonly ElementName MOMENT = new ElementName("moment", "moment", (int) DispatchGroup.OTHER);
 646 | 		public static readonly ElementName MATRIX = new ElementName("matrix", "matrix", (int) DispatchGroup.OTHER);
 647 | 		public static readonly ElementName OPTION = new ElementName("option", "option", (int) DispatchGroup.OPTION | OPTIONAL_END_TAG);
 648 | 		public static readonly ElementName OBJECT = new ElementName("object", "object", (int) DispatchGroup.OBJECT | SPECIAL | SCOPING);
 649 | 		public static readonly ElementName OUTPUT = new ElementName("output", "output", (int) DispatchGroup.OUTPUT_OR_LABEL);
 650 | 		public static readonly ElementName PRIMES = new ElementName("primes", "primes", (int) DispatchGroup.OTHER);
 651 | 		public static readonly ElementName SOURCE = new ElementName("source", "source", (int) DispatchGroup.PARAM_OR_SOURCE_OR_TRACK);
 652 | 		public static readonly ElementName STRIKE = new ElementName("strike", "strike", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 653 | 		public static readonly ElementName STRONG = new ElementName("strong", "strong", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
 654 | 		public static readonly ElementName SWITCH = new ElementName("switch", "switch", (int) DispatchGroup.OTHER);
 655 | 		public static readonly ElementName SYMBOL = new ElementName("symbol", "symbol", (int) DispatchGroup.OTHER);
 656 | 		public static readonly ElementName SELECT = new ElementName("select", "select", (int) DispatchGroup.SELECT | SPECIAL);
 657 | 		public static readonly ElementName SUBSET = new ElementName("subset", "subset", (int) DispatchGroup.OTHER);
 658 | 		public static readonly ElementName SCRIPT = new ElementName("script", "script", (int) DispatchGroup.SCRIPT | SPECIAL);
 659 | 		public static readonly ElementName TBREAK = new ElementName("tbreak", "tbreak", (int) DispatchGroup.OTHER);
 660 | 		public static readonly ElementName VECTOR = new ElementName("vector", "vector", (int) DispatchGroup.OTHER);
 661 | 		public static readonly ElementName ARTICLE = new ElementName("article", "article", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 662 | 		public static readonly ElementName ANIMATE = new ElementName("animate", "animate", (int) DispatchGroup.OTHER);
 663 | 		public static readonly ElementName ARCSECH = new ElementName("arcsech", "arcsech", (int) DispatchGroup.OTHER);
 664 | 		public static readonly ElementName ARCCSCH = new ElementName("arccsch", "arccsch", (int) DispatchGroup.OTHER);
 665 | 		public static readonly ElementName ARCTANH = new ElementName("arctanh", "arctanh", (int) DispatchGroup.OTHER);
 666 | 		public static readonly ElementName ARCSINH = new ElementName("arcsinh", "arcsinh", (int) DispatchGroup.OTHER);
 667 | 		public static readonly ElementName ARCCOSH = new ElementName("arccosh", "arccosh", (int) DispatchGroup.OTHER);
 668 | 		public static readonly ElementName ARCCOTH = new ElementName("arccoth", "arccoth", (int) DispatchGroup.OTHER);
 669 | 		public static readonly ElementName ACRONYM = new ElementName("acronym", "acronym", (int) DispatchGroup.OTHER);
 670 | 		public static readonly ElementName ADDRESS = new ElementName("address", "address", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 671 | 		public static readonly ElementName BGSOUND = new ElementName("bgsound", "bgsound", (int) DispatchGroup.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL);
 672 | 		public static readonly ElementName COMMAND = new ElementName("command", "command", (int) DispatchGroup.COMMAND | SPECIAL);
 673 | 		public static readonly ElementName COMPOSE = new ElementName("compose", "compose", (int) DispatchGroup.OTHER);
 674 | 		public static readonly ElementName CEILING = new ElementName("ceiling", "ceiling", (int) DispatchGroup.OTHER);
 675 | 		public static readonly ElementName CSYMBOL = new ElementName("csymbol", "csymbol", (int) DispatchGroup.OTHER);
 676 | 		public static readonly ElementName CAPTION = new ElementName("caption", "caption", (int) DispatchGroup.CAPTION | SPECIAL | SCOPING);
 677 | 		public static readonly ElementName DISCARD = new ElementName("discard", "discard", (int) DispatchGroup.OTHER);
 678 | 		public static readonly ElementName DECLARE = new ElementName("declare", "declare", (int) DispatchGroup.OTHER);
 679 | 		public static readonly ElementName DETAILS = new ElementName("details", "details", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 680 | 		public static readonly ElementName ELLIPSE = new ElementName("ellipse", "ellipse", (int) DispatchGroup.OTHER);
 681 | 		public static readonly ElementName FEFUNCA = new ElementName("fefunca", "feFuncA", (int) DispatchGroup.OTHER);
 682 | 		public static readonly ElementName FEFUNCB = new ElementName("fefuncb", "feFuncB", (int) DispatchGroup.OTHER);
 683 | 		public static readonly ElementName FEBLEND = new ElementName("feblend", "feBlend", (int) DispatchGroup.OTHER);
 684 | 		public static readonly ElementName FEFLOOD = new ElementName("feflood", "feFlood", (int) DispatchGroup.OTHER);
 685 | 		public static readonly ElementName FEIMAGE = new ElementName("feimage", "feImage", (int) DispatchGroup.OTHER);
 686 | 		public static readonly ElementName FEMERGE = new ElementName("femerge", "feMerge", (int) DispatchGroup.OTHER);
 687 | 		public static readonly ElementName FEFUNCG = new ElementName("fefuncg", "feFuncG", (int) DispatchGroup.OTHER);
 688 | 		public static readonly ElementName FEFUNCR = new ElementName("fefuncr", "feFuncR", (int) DispatchGroup.OTHER);
 689 | 		public static readonly ElementName HANDLER = new ElementName("handler", "handler", (int) DispatchGroup.OTHER);
 690 | 		public static readonly ElementName INVERSE = new ElementName("inverse", "inverse", (int) DispatchGroup.OTHER);
 691 | 		public static readonly ElementName IMPLIES = new ElementName("implies", "implies", (int) DispatchGroup.OTHER);
 692 | 		public static readonly ElementName ISINDEX = new ElementName("isindex", "isindex", (int) DispatchGroup.ISINDEX | SPECIAL);
 693 | 		public static readonly ElementName LOGBASE = new ElementName("logbase", "logbase", (int) DispatchGroup.OTHER);
 694 | 		public static readonly ElementName LISTING = new ElementName("listing", "listing", (int) DispatchGroup.PRE_OR_LISTING | SPECIAL);
 695 | 		public static readonly ElementName MFENCED = new ElementName("mfenced", "mfenced", (int) DispatchGroup.OTHER);
 696 | 		public static readonly ElementName MPADDED = new ElementName("mpadded", "mpadded", (int) DispatchGroup.OTHER);
 697 | 		public static readonly ElementName MARQUEE = new ElementName("marquee", "marquee", (int) DispatchGroup.MARQUEE_OR_APPLET | SPECIAL | SCOPING);
 698 | 		public static readonly ElementName MACTION = new ElementName("maction", "maction", (int) DispatchGroup.OTHER);
 699 | 		public static readonly ElementName MSUBSUP = new ElementName("msubsup", "msubsup", (int) DispatchGroup.OTHER);
 700 | 		public static readonly ElementName NOEMBED = new ElementName("noembed", "noembed", (int) DispatchGroup.NOEMBED | SPECIAL);
 701 | 		public static readonly ElementName POLYGON = new ElementName("polygon", "polygon", (int) DispatchGroup.OTHER);
 702 | 		public static readonly ElementName PATTERN = new ElementName("pattern", "pattern", (int) DispatchGroup.OTHER);
 703 | 		public static readonly ElementName PRODUCT = new ElementName("product", "product", (int) DispatchGroup.OTHER);
 704 | 		public static readonly ElementName SETDIFF = new ElementName("setdiff", "setdiff", (int) DispatchGroup.OTHER);
 705 | 		public static readonly ElementName SECTION = new ElementName("section", "section", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 706 | 		public static readonly ElementName SUMMARY = new ElementName("summary", "summary", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 707 | 		public static readonly ElementName TENDSTO = new ElementName("tendsto", "tendsto", (int) DispatchGroup.OTHER);
 708 | 		public static readonly ElementName UPLIMIT = new ElementName("uplimit", "uplimit", (int) DispatchGroup.OTHER);
 709 | 		public static readonly ElementName ALTGLYPH = new ElementName("altglyph", "altGlyph", (int) DispatchGroup.OTHER);
 710 | 		public static readonly ElementName BASEFONT = new ElementName("basefont", "basefont", (int) DispatchGroup.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL);
 711 | 		public static readonly ElementName CLIPPATH = new ElementName("clippath", "clipPath", (int) DispatchGroup.OTHER);
 712 | 		public static readonly ElementName CODOMAIN = new ElementName("codomain", "codomain", (int) DispatchGroup.OTHER);
 713 | 		public static readonly ElementName COLGROUP = new ElementName("colgroup", "colgroup", (int) DispatchGroup.COLGROUP | SPECIAL | OPTIONAL_END_TAG);
 714 | 		public static readonly ElementName EMPTYSET = new ElementName("emptyset", "emptyset", (int) DispatchGroup.OTHER);
 715 | 		public static readonly ElementName FACTOROF = new ElementName("factorof", "factorof", (int) DispatchGroup.OTHER);
 716 | 		public static readonly ElementName FIELDSET = new ElementName("fieldset", "fieldset", (int) DispatchGroup.FIELDSET | SPECIAL);
 717 | 		public static readonly ElementName FRAMESET = new ElementName("frameset", "frameset", (int) DispatchGroup.FRAMESET | SPECIAL);
 718 | 		public static readonly ElementName FEOFFSET = new ElementName("feoffset", "feOffset", (int) DispatchGroup.OTHER);
 719 | 		public static readonly ElementName GLYPHREF = new ElementName("glyphref", "glyphRef", (int) DispatchGroup.OTHER);
 720 | 		public static readonly ElementName INTERVAL = new ElementName("interval", "interval", (int) DispatchGroup.OTHER);
 721 | 		public static readonly ElementName INTEGERS = new ElementName("integers", "integers", (int) DispatchGroup.OTHER);
 722 | 		public static readonly ElementName INFINITY = new ElementName("infinity", "infinity", (int) DispatchGroup.OTHER);
 723 | 		public static readonly ElementName LISTENER = new ElementName("listener", "listener", (int) DispatchGroup.OTHER);
 724 | 		public static readonly ElementName LOWLIMIT = new ElementName("lowlimit", "lowlimit", (int) DispatchGroup.OTHER);
 725 | 		public static readonly ElementName METADATA = new ElementName("metadata", "metadata", (int) DispatchGroup.OTHER);
 726 | 		public static readonly ElementName MENCLOSE = new ElementName("menclose", "menclose", (int) DispatchGroup.OTHER);
 727 | 		public static readonly ElementName MPHANTOM = new ElementName("mphantom", "mphantom", (int) DispatchGroup.OTHER);
 728 | 		public static readonly ElementName NOFRAMES = new ElementName("noframes", "noframes", (int) DispatchGroup.NOFRAMES | SPECIAL);
 729 | 		public static readonly ElementName NOSCRIPT = new ElementName("noscript", "noscript", (int) DispatchGroup.NOSCRIPT | SPECIAL);
 730 | 		public static readonly ElementName OPTGROUP = new ElementName("optgroup", "optgroup", (int) DispatchGroup.OPTGROUP | SPECIAL | OPTIONAL_END_TAG);
 731 | 		public static readonly ElementName POLYLINE = new ElementName("polyline", "polyline", (int) DispatchGroup.OTHER);
 732 | 		public static readonly ElementName PREFETCH = new ElementName("prefetch", "prefetch", (int) DispatchGroup.OTHER);
 733 | 		public static readonly ElementName PROGRESS = new ElementName("progress", "progress", (int) DispatchGroup.OTHER);
 734 | 		public static readonly ElementName PRSUBSET = new ElementName("prsubset", "prsubset", (int) DispatchGroup.OTHER);
 735 | 		public static readonly ElementName QUOTIENT = new ElementName("quotient", "quotient", (int) DispatchGroup.OTHER);
 736 | 		public static readonly ElementName SELECTOR = new ElementName("selector", "selector", (int) DispatchGroup.OTHER);
 737 | 		public static readonly ElementName TEXTAREA = new ElementName("textarea", "textarea", (int) DispatchGroup.TEXTAREA | SPECIAL);
 738 | 		public static readonly ElementName TEXTPATH = new ElementName("textpath", "textPath", (int) DispatchGroup.OTHER);
 739 | 		public static readonly ElementName VARIANCE = new ElementName("variance", "variance", (int) DispatchGroup.OTHER);
 740 | 		public static readonly ElementName ANIMATION = new ElementName("animation", "animation", (int) DispatchGroup.OTHER);
 741 | 		public static readonly ElementName CONJUGATE = new ElementName("conjugate", "conjugate", (int) DispatchGroup.OTHER);
 742 | 		public static readonly ElementName CONDITION = new ElementName("condition", "condition", (int) DispatchGroup.OTHER);
 743 | 		public static readonly ElementName COMPLEXES = new ElementName("complexes", "complexes", (int) DispatchGroup.OTHER);
 744 | 		public static readonly ElementName FONT_FACE = new ElementName("font-face", "font-face", (int) DispatchGroup.OTHER);
 745 | 		public static readonly ElementName FACTORIAL = new ElementName("factorial", "factorial", (int) DispatchGroup.OTHER);
 746 | 		public static readonly ElementName INTERSECT = new ElementName("intersect", "intersect", (int) DispatchGroup.OTHER);
 747 | 		public static readonly ElementName IMAGINARY = new ElementName("imaginary", "imaginary", (int) DispatchGroup.OTHER);
 748 | 		public static readonly ElementName LAPLACIAN = new ElementName("laplacian", "laplacian", (int) DispatchGroup.OTHER);
 749 | 		public static readonly ElementName MATRIXROW = new ElementName("matrixrow", "matrixrow", (int) DispatchGroup.OTHER);
 750 | 		public static readonly ElementName NOTSUBSET = new ElementName("notsubset", "notsubset", (int) DispatchGroup.OTHER);
 751 | 		public static readonly ElementName OTHERWISE = new ElementName("otherwise", "otherwise", (int) DispatchGroup.OTHER);
 752 | 		public static readonly ElementName PIECEWISE = new ElementName("piecewise", "piecewise", (int) DispatchGroup.OTHER);
 753 | 		public static readonly ElementName PLAINTEXT = new ElementName("plaintext", "plaintext", (int) DispatchGroup.PLAINTEXT | SPECIAL);
 754 | 		public static readonly ElementName RATIONALS = new ElementName("rationals", "rationals", (int) DispatchGroup.OTHER);
 755 | 		public static readonly ElementName SEMANTICS = new ElementName("semantics", "semantics", (int) DispatchGroup.OTHER);
 756 | 		public static readonly ElementName TRANSPOSE = new ElementName("transpose", "transpose", (int) DispatchGroup.OTHER);
 757 | 		public static readonly ElementName ANNOTATION = new ElementName("annotation", "annotation", (int) DispatchGroup.OTHER);
 758 | 		public static readonly ElementName BLOCKQUOTE = new ElementName("blockquote", "blockquote", (int) DispatchGroup.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
 759 | 		public static readonly ElementName DIVERGENCE = new ElementName("divergence", "divergence", (int) DispatchGroup.OTHER);
 760 | 		public static readonly ElementName EULERGAMMA = new ElementName("eulergamma", "eulergamma", (int) DispatchGroup.OTHER);
 761 | 		public static readonly ElementName EQUIVALENT = new ElementName("equivalent", "equivalent", (int) DispatchGroup.OTHER);
 762 | 		public static readonly ElementName FIGCAPTION = new ElementName("figcaption", "figcaption", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
 763 | 		public static readonly ElementName IMAGINARYI = new ElementName("imaginaryi", "imaginaryi", (int) DispatchGroup.OTHER);
 764 | 		public static readonly ElementName MALIGNMARK = new ElementName("malignmark", "malignmark", (int) DispatchGroup.MGLYPH_OR_MALIGNMARK);
 765 | 		public static readonly ElementName MUNDEROVER = new ElementName("munderover", "munderover", (int) DispatchGroup.OTHER);
 766 | 		public static readonly ElementName MLABELEDTR = new ElementName("mlabeledtr", "mlabeledtr", (int) DispatchGroup.OTHER);
 767 | 		public static readonly ElementName NOTANUMBER = new ElementName("notanumber", "notanumber", (int) DispatchGroup.OTHER);
 768 | 		public static readonly ElementName SOLIDCOLOR = new ElementName("solidcolor", "solidcolor", (int) DispatchGroup.OTHER);
 769 | 		public static readonly ElementName ALTGLYPHDEF = new ElementName("altglyphdef", "altGlyphDef", (int) DispatchGroup.OTHER);
 770 | 		public static readonly ElementName DETERMINANT = new ElementName("determinant", "determinant", (int) DispatchGroup.OTHER);
 771 | 		public static readonly ElementName FEMERGENODE = new ElementName("femergenode", "feMergeNode", (int) DispatchGroup.OTHER);
 772 | 		public static readonly ElementName FECOMPOSITE = new ElementName("fecomposite", "feComposite", (int) DispatchGroup.OTHER);
 773 | 		public static readonly ElementName FESPOTLIGHT = new ElementName("fespotlight", "feSpotLight", (int) DispatchGroup.OTHER);
 774 | 		public static readonly ElementName MALIGNGROUP = new ElementName("maligngroup", "maligngroup", (int) DispatchGroup.OTHER);
 775 | 		public static readonly ElementName MPRESCRIPTS = new ElementName("mprescripts", "mprescripts", (int) DispatchGroup.OTHER);
 776 | 		public static readonly ElementName MOMENTABOUT = new ElementName("momentabout", "momentabout", (int) DispatchGroup.OTHER);
 777 | 		public static readonly ElementName NOTPRSUBSET = new ElementName("notprsubset", "notprsubset", (int) DispatchGroup.OTHER);
 778 | 		public static readonly ElementName PARTIALDIFF = new ElementName("partialdiff", "partialdiff", (int) DispatchGroup.OTHER);
 779 | 		public static readonly ElementName ALTGLYPHITEM = new ElementName("altglyphitem", "altGlyphItem", (int) DispatchGroup.OTHER);
 780 | 		public static readonly ElementName ANIMATECOLOR = new ElementName("animatecolor", "animateColor", (int) DispatchGroup.OTHER);
 781 | 		public static readonly ElementName DATATEMPLATE = new ElementName("datatemplate", "datatemplate", (int) DispatchGroup.OTHER);
 782 | 		public static readonly ElementName EXPONENTIALE = new ElementName("exponentiale", "exponentiale", (int) DispatchGroup.OTHER);
 783 | 		public static readonly ElementName FETURBULENCE = new ElementName("feturbulence", "feTurbulence", (int) DispatchGroup.OTHER);
 784 | 		public static readonly ElementName FEPOINTLIGHT = new ElementName("fepointlight", "fePointLight", (int) DispatchGroup.OTHER);
 785 | 		public static readonly ElementName FEMORPHOLOGY = new ElementName("femorphology", "feMorphology", (int) DispatchGroup.OTHER);
 786 | 		public static readonly ElementName OUTERPRODUCT = new ElementName("outerproduct", "outerproduct", (int) DispatchGroup.OTHER);
 787 | 		public static readonly ElementName ANIMATEMOTION = new ElementName("animatemotion", "animateMotion", (int) DispatchGroup.OTHER);
 788 | 		public static readonly ElementName COLOR_PROFILE = new ElementName("color-profile", "color-profile", (int) DispatchGroup.OTHER);
 789 | 		public static readonly ElementName FONT_FACE_SRC = new ElementName("font-face-src", "font-face-src", (int) DispatchGroup.OTHER);
 790 | 		public static readonly ElementName FONT_FACE_URI = new ElementName("font-face-uri", "font-face-uri", (int) DispatchGroup.OTHER);
 791 | 		public static readonly ElementName FOREIGNOBJECT = new ElementName("foreignobject", "foreignObject", (int) DispatchGroup.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG);
 792 | 		public static readonly ElementName FECOLORMATRIX = new ElementName("fecolormatrix", "feColorMatrix", (int) DispatchGroup.OTHER);
 793 | 		public static readonly ElementName MISSING_GLYPH = new ElementName("missing-glyph", "missing-glyph", (int) DispatchGroup.OTHER);
 794 | 		public static readonly ElementName MMULTISCRIPTS = new ElementName("mmultiscripts", "mmultiscripts", (int) DispatchGroup.OTHER);
 795 | 		public static readonly ElementName SCALARPRODUCT = new ElementName("scalarproduct", "scalarproduct", (int) DispatchGroup.OTHER);
 796 | 		public static readonly ElementName VECTORPRODUCT = new ElementName("vectorproduct", "vectorproduct", (int) DispatchGroup.OTHER);
 797 | 		public static readonly ElementName ANNOTATION_XML = new ElementName("annotation-xml", "annotation-xml", (int) DispatchGroup.ANNOTATION_XML | SCOPING_AS_MATHML);
 798 | 		public static readonly ElementName DEFINITION_SRC = new ElementName("definition-src", "definition-src", (int) DispatchGroup.OTHER);
 799 | 		public static readonly ElementName FONT_FACE_NAME = new ElementName("font-face-name", "font-face-name", (int) DispatchGroup.OTHER);
 800 | 		public static readonly ElementName FEGAUSSIANBLUR = new ElementName("fegaussianblur", "feGaussianBlur", (int) DispatchGroup.OTHER);
 801 | 		public static readonly ElementName FEDISTANTLIGHT = new ElementName("fedistantlight", "feDistantLight", (int) DispatchGroup.OTHER);
 802 | 		public static readonly ElementName LINEARGRADIENT = new ElementName("lineargradient", "linearGradient", (int) DispatchGroup.OTHER);
 803 | 		public static readonly ElementName NATURALNUMBERS = new ElementName("naturalnumbers", "naturalnumbers", (int) DispatchGroup.OTHER);
 804 | 		public static readonly ElementName RADIALGRADIENT = new ElementName("radialgradient", "radialGradient", (int) DispatchGroup.OTHER);
 805 | 		public static readonly ElementName ANIMATETRANSFORM = new ElementName("animatetransform", "animateTransform", (int) DispatchGroup.OTHER);
 806 | 		public static readonly ElementName CARTESIANPRODUCT = new ElementName("cartesianproduct", "cartesianproduct", (int) DispatchGroup.OTHER);
 807 | 		public static readonly ElementName FONT_FACE_FORMAT = new ElementName("font-face-format", "font-face-format", (int) DispatchGroup.OTHER);
 808 | 		public static readonly ElementName FECONVOLVEMATRIX = new ElementName("feconvolvematrix", "feConvolveMatrix", (int) DispatchGroup.OTHER);
 809 | 		public static readonly ElementName FEDIFFUSELIGHTING = new ElementName("fediffuselighting", "feDiffuseLighting", (int) DispatchGroup.OTHER);
 810 | 		public static readonly ElementName FEDISPLACEMENTMAP = new ElementName("fedisplacementmap", "feDisplacementMap", (int) DispatchGroup.OTHER);
 811 | 		public static readonly ElementName FESPECULARLIGHTING = new ElementName("fespecularlighting", "feSpecularLighting", (int) DispatchGroup.OTHER);
 812 | 		public static readonly ElementName DOMAINOFAPPLICATION = new ElementName("domainofapplication", "domainofapplication", (int) DispatchGroup.OTHER);
 813 | 		public static readonly ElementName FECOMPONENTTRANSFER = new ElementName("fecomponenttransfer", "feComponentTransfer", (int) DispatchGroup.OTHER);
 814 | 		private static readonly ElementName[] ELEMENT_NAMES = {
 815 | 	A,
 816 | 	B,
 817 | 	G,
 818 | 	I,
 819 | 	P,
 820 | 	Q,
 821 | 	S,
 822 | 	U,
 823 | 	BR,
 824 | 	CI,
 825 | 	CN,
 826 | 	DD,
 827 | 	DL,
 828 | 	DT,
 829 | 	EM,
 830 | 	EQ,
 831 | 	FN,
 832 | 	H1,
 833 | 	H2,
 834 | 	H3,
 835 | 	H4,
 836 | 	H5,
 837 | 	H6,
 838 | 	GT,
 839 | 	HR,
 840 | 	IN,
 841 | 	LI,
 842 | 	LN,
 843 | 	LT,
 844 | 	MI,
 845 | 	MN,
 846 | 	MO,
 847 | 	MS,
 848 | 	OL,
 849 | 	OR,
 850 | 	PI,
 851 | 	RP,
 852 | 	RT,
 853 | 	TD,
 854 | 	TH,
 855 | 	TR,
 856 | 	TT,
 857 | 	UL,
 858 | 	AND,
 859 | 	ARG,
 860 | 	ABS,
 861 | 	BIG,
 862 | 	BDO,
 863 | 	CSC,
 864 | 	COL,
 865 | 	COS,
 866 | 	COT,
 867 | 	DEL,
 868 | 	DFN,
 869 | 	DIR,
 870 | 	DIV,
 871 | 	EXP,
 872 | 	GCD,
 873 | 	GEQ,
 874 | 	IMG,
 875 | 	INS,
 876 | 	INT,
 877 | 	KBD,
 878 | 	LOG,
 879 | 	LCM,
 880 | 	LEQ,
 881 | 	MTD,
 882 | 	MIN,
 883 | 	MAP,
 884 | 	MTR,
 885 | 	MAX,
 886 | 	NEQ,
 887 | 	NOT,
 888 | 	NAV,
 889 | 	PRE,
 890 | 	REM,
 891 | 	SUB,
 892 | 	SEC,
 893 | 	SVG,
 894 | 	SUM,
 895 | 	SIN,
 896 | 	SEP,
 897 | 	SUP,
 898 | 	SET,
 899 | 	TAN,
 900 | 	USE,
 901 | 	VAR,
 902 | 	WBR,
 903 | 	XMP,
 904 | 	XOR,
 905 | 	AREA,
 906 | 	ABBR,
 907 | 	BASE,
 908 | 	BVAR,
 909 | 	BODY,
 910 | 	CARD,
 911 | 	CODE,
 912 | 	CITE,
 913 | 	CSCH,
 914 | 	COSH,
 915 | 	COTH,
 916 | 	CURL,
 917 | 	DESC,
 918 | 	DIFF,
 919 | 	DEFS,
 920 | 	FORM,
 921 | 	FONT,
 922 | 	GRAD,
 923 | 	HEAD,
 924 | 	HTML,
 925 | 	LINE,
 926 | 	LINK,
 927 | 	LIST,
 928 | 	META,
 929 | 	MSUB,
 930 | 	MODE,
 931 | 	MATH,
 932 | 	MARK,
 933 | 	MASK,
 934 | 	MEAN,
 935 | 	MSUP,
 936 | 	MENU,
 937 | 	MROW,
 938 | 	NONE,
 939 | 	NOBR,
 940 | 	NEST,
 941 | 	PATH,
 942 | 	PLUS,
 943 | 	RULE,
 944 | 	REAL,
 945 | 	RELN,
 946 | 	RECT,
 947 | 	ROOT,
 948 | 	RUBY,
 949 | 	SECH,
 950 | 	SINH,
 951 | 	SPAN,
 952 | 	SAMP,
 953 | 	STOP,
 954 | 	SDEV,
 955 | 	TIME,
 956 | 	TRUE,
 957 | 	TREF,
 958 | 	TANH,
 959 | 	TEXT,
 960 | 	VIEW,
 961 | 	ASIDE,
 962 | 	AUDIO,
 963 | 	APPLY,
 964 | 	EMBED,
 965 | 	FRAME,
 966 | 	FALSE,
 967 | 	FLOOR,
 968 | 	GLYPH,
 969 | 	HKERN,
 970 | 	IMAGE,
 971 | 	IDENT,
 972 | 	INPUT,
 973 | 	LABEL,
 974 | 	LIMIT,
 975 | 	MFRAC,
 976 | 	MPATH,
 977 | 	METER,
 978 | 	MOVER,
 979 | 	MINUS,
 980 | 	MROOT,
 981 | 	MSQRT,
 982 | 	MTEXT,
 983 | 	NOTIN,
 984 | 	PIECE,
 985 | 	PARAM,
 986 | 	POWER,
 987 | 	REALS,
 988 | 	STYLE,
 989 | 	SMALL,
 990 | 	THEAD,
 991 | 	TABLE,
 992 | 	TITLE,
 993 | 	TRACK,
 994 | 	TSPAN,
 995 | 	TIMES,
 996 | 	TFOOT,
 997 | 	TBODY,
 998 | 	UNION,
 999 | 	VKERN,
1000 | 	VIDEO,
1001 | 	ARCSEC,
1002 | 	ARCCSC,
1003 | 	ARCTAN,
1004 | 	ARCSIN,
1005 | 	ARCCOS,
1006 | 	APPLET,
1007 | 	ARCCOT,
1008 | 	APPROX,
1009 | 	BUTTON,
1010 | 	CIRCLE,
1011 | 	CENTER,
1012 | 	CURSOR,
1013 | 	CANVAS,
1014 | 	DIVIDE,
1015 | 	DEGREE,
1016 | 	DOMAIN,
1017 | 	EXISTS,
1018 | 	FETILE,
1019 | 	FIGURE,
1020 | 	FORALL,
1021 | 	FILTER,
1022 | 	FOOTER,
1023 | 	HGROUP,
1024 | 	HEADER,
1025 | 	IFRAME,
1026 | 	KEYGEN,
1027 | 	LAMBDA,
1028 | 	LEGEND,
1029 | 	MSPACE,
1030 | 	MTABLE,
1031 | 	MSTYLE,
1032 | 	MGLYPH,
1033 | 	MEDIAN,
1034 | 	MUNDER,
1035 | 	MARKER,
1036 | 	MERROR,
1037 | 	MOMENT,
1038 | 	MATRIX,
1039 | 	OPTION,
1040 | 	OBJECT,
1041 | 	OUTPUT,
1042 | 	PRIMES,
1043 | 	SOURCE,
1044 | 	STRIKE,
1045 | 	STRONG,
1046 | 	SWITCH,
1047 | 	SYMBOL,
1048 | 	SELECT,
1049 | 	SUBSET,
1050 | 	SCRIPT,
1051 | 	TBREAK,
1052 | 	VECTOR,
1053 | 	ARTICLE,
1054 | 	ANIMATE,
1055 | 	ARCSECH,
1056 | 	ARCCSCH,
1057 | 	ARCTANH,
1058 | 	ARCSINH,
1059 | 	ARCCOSH,
1060 | 	ARCCOTH,
1061 | 	ACRONYM,
1062 | 	ADDRESS,
1063 | 	BGSOUND,
1064 | 	COMMAND,
1065 | 	COMPOSE,
1066 | 	CEILING,
1067 | 	CSYMBOL,
1068 | 	CAPTION,
1069 | 	DISCARD,
1070 | 	DECLARE,
1071 | 	DETAILS,
1072 | 	ELLIPSE,
1073 | 	FEFUNCA,
1074 | 	FEFUNCB,
1075 | 	FEBLEND,
1076 | 	FEFLOOD,
1077 | 	FEIMAGE,
1078 | 	FEMERGE,
1079 | 	FEFUNCG,
1080 | 	FEFUNCR,
1081 | 	HANDLER,
1082 | 	INVERSE,
1083 | 	IMPLIES,
1084 | 	ISINDEX,
1085 | 	LOGBASE,
1086 | 	LISTING,
1087 | 	MFENCED,
1088 | 	MPADDED,
1089 | 	MARQUEE,
1090 | 	MACTION,
1091 | 	MSUBSUP,
1092 | 	NOEMBED,
1093 | 	POLYGON,
1094 | 	PATTERN,
1095 | 	PRODUCT,
1096 | 	SETDIFF,
1097 | 	SECTION,
1098 | 	SUMMARY,
1099 | 	TENDSTO,
1100 | 	UPLIMIT,
1101 | 	ALTGLYPH,
1102 | 	BASEFONT,
1103 | 	CLIPPATH,
1104 | 	CODOMAIN,
1105 | 	COLGROUP,
1106 | 	EMPTYSET,
1107 | 	FACTOROF,
1108 | 	FIELDSET,
1109 | 	FRAMESET,
1110 | 	FEOFFSET,
1111 | 	GLYPHREF,
1112 | 	INTERVAL,
1113 | 	INTEGERS,
1114 | 	INFINITY,
1115 | 	LISTENER,
1116 | 	LOWLIMIT,
1117 | 	METADATA,
1118 | 	MENCLOSE,
1119 | 	MPHANTOM,
1120 | 	NOFRAMES,
1121 | 	NOSCRIPT,
1122 | 	OPTGROUP,
1123 | 	POLYLINE,
1124 | 	PREFETCH,
1125 | 	PROGRESS,
1126 | 	PRSUBSET,
1127 | 	QUOTIENT,
1128 | 	SELECTOR,
1129 | 	TEXTAREA,
1130 | 	TEXTPATH,
1131 | 	VARIANCE,
1132 | 	ANIMATION,
1133 | 	CONJUGATE,
1134 | 	CONDITION,
1135 | 	COMPLEXES,
1136 | 	FONT_FACE,
1137 | 	FACTORIAL,
1138 | 	INTERSECT,
1139 | 	IMAGINARY,
1140 | 	LAPLACIAN,
1141 | 	MATRIXROW,
1142 | 	NOTSUBSET,
1143 | 	OTHERWISE,
1144 | 	PIECEWISE,
1145 | 	PLAINTEXT,
1146 | 	RATIONALS,
1147 | 	SEMANTICS,
1148 | 	TRANSPOSE,
1149 | 	ANNOTATION,
1150 | 	BLOCKQUOTE,
1151 | 	DIVERGENCE,
1152 | 	EULERGAMMA,
1153 | 	EQUIVALENT,
1154 | 	FIGCAPTION,
1155 | 	IMAGINARYI,
1156 | 	MALIGNMARK,
1157 | 	MUNDEROVER,
1158 | 	MLABELEDTR,
1159 | 	NOTANUMBER,
1160 | 	SOLIDCOLOR,
1161 | 	ALTGLYPHDEF,
1162 | 	DETERMINANT,
1163 | 	FEMERGENODE,
1164 | 	FECOMPOSITE,
1165 | 	FESPOTLIGHT,
1166 | 	MALIGNGROUP,
1167 | 	MPRESCRIPTS,
1168 | 	MOMENTABOUT,
1169 | 	NOTPRSUBSET,
1170 | 	PARTIALDIFF,
1171 | 	ALTGLYPHITEM,
1172 | 	ANIMATECOLOR,
1173 | 	DATATEMPLATE,
1174 | 	EXPONENTIALE,
1175 | 	FETURBULENCE,
1176 | 	FEPOINTLIGHT,
1177 | 	FEMORPHOLOGY,
1178 | 	OUTERPRODUCT,
1179 | 	ANIMATEMOTION,
1180 | 	COLOR_PROFILE,
1181 | 	FONT_FACE_SRC,
1182 | 	FONT_FACE_URI,
1183 | 	FOREIGNOBJECT,
1184 | 	FECOLORMATRIX,
1185 | 	MISSING_GLYPH,
1186 | 	MMULTISCRIPTS,
1187 | 	SCALARPRODUCT,
1188 | 	VECTORPRODUCT,
1189 | 	ANNOTATION_XML,
1190 | 	DEFINITION_SRC,
1191 | 	FONT_FACE_NAME,
1192 | 	FEGAUSSIANBLUR,
1193 | 	FEDISTANTLIGHT,
1194 | 	LINEARGRADIENT,
1195 | 	NATURALNUMBERS,
1196 | 	RADIALGRADIENT,
1197 | 	ANIMATETRANSFORM,
1198 | 	CARTESIANPRODUCT,
1199 | 	FONT_FACE_FORMAT,
1200 | 	FECONVOLVEMATRIX,
1201 | 	FEDIFFUSELIGHTING,
1202 | 	FEDISPLACEMENTMAP,
1203 | 	FESPECULARLIGHTING,
1204 | 	DOMAINOFAPPLICATION,
1205 | 	FECOMPONENTTRANSFER,
1206 | 	};
1207 | 		private static readonly int[] ELEMENT_HASHES = {
1208 | 	1057,
1209 | 	1090,
1210 | 	1255,
1211 | 	1321,
1212 | 	1552,
1213 | 	1585,
1214 | 	1651,
1215 | 	1717,
1216 | 	68162,
1217 | 	68899,
1218 | 	69059,
1219 | 	69764,
1220 | 	70020,
1221 | 	70276,
1222 | 	71077,
1223 | 	71205,
1224 | 	72134,
1225 | 	72232,
1226 | 	72264,
1227 | 	72296,
1228 | 	72328,
1229 | 	72360,
1230 | 	72392,
1231 | 	73351,
1232 | 	74312,
1233 | 	75209,
1234 | 	78124,
1235 | 	78284,
1236 | 	78476,
1237 | 	79149,
1238 | 	79309,
1239 | 	79341,
1240 | 	79469,
1241 | 	81295,
1242 | 	81487,
1243 | 	82224,
1244 | 	84498,
1245 | 	84626,
1246 | 	86164,
1247 | 	86292,
1248 | 	86612,
1249 | 	86676,
1250 | 	87445,
1251 | 	3183041,
1252 | 	3186241,
1253 | 	3198017,
1254 | 	3218722,
1255 | 	3226754,
1256 | 	3247715,
1257 | 	3256803,
1258 | 	3263971,
1259 | 	3264995,
1260 | 	3289252,
1261 | 	3291332,
1262 | 	3295524,
1263 | 	3299620,
1264 | 	3326725,
1265 | 	3379303,
1266 | 	3392679,
1267 | 	3448233,
1268 | 	3460553,
1269 | 	3461577,
1270 | 	3510347,
1271 | 	3546604,
1272 | 	3552364,
1273 | 	3556524,
1274 | 	3576461,
1275 | 	3586349,
1276 | 	3588141,
1277 | 	3590797,
1278 | 	3596333,
1279 | 	3622062,
1280 | 	3625454,
1281 | 	3627054,
1282 | 	3675728,
1283 | 	3749042,
1284 | 	3771059,
1285 | 	3771571,
1286 | 	3776211,
1287 | 	3782323,
1288 | 	3782963,
1289 | 	3784883,
1290 | 	3785395,
1291 | 	3788979,
1292 | 	3815476,
1293 | 	3839605,
1294 | 	3885110,
1295 | 	3917911,
1296 | 	3948984,
1297 | 	3951096,
1298 | 	135304769,
1299 | 	135858241,
1300 | 	136498210,
1301 | 	136906434,
1302 | 	137138658,
1303 | 	137512995,
1304 | 	137531875,
1305 | 	137548067,
1306 | 	137629283,
1307 | 	137645539,
1308 | 	137646563,
1309 | 	137775779,
1310 | 	138529956,
1311 | 	138615076,
1312 | 	139040932,
1313 | 	140954086,
1314 | 	141179366,
1315 | 	141690439,
1316 | 	142738600,
1317 | 	143013512,
1318 | 	146979116,
1319 | 	147175724,
1320 | 	147475756,
1321 | 	147902637,
1322 | 	147936877,
1323 | 	148017645,
1324 | 	148131885,
1325 | 	148228141,
1326 | 	148229165,
1327 | 	148309165,
1328 | 	148395629,
1329 | 	148551853,
1330 | 	148618829,
1331 | 	149076462,
1332 | 	149490158,
1333 | 	149572782,
1334 | 	151277616,
1335 | 	151639440,
1336 | 	153268914,
1337 | 	153486514,
1338 | 	153563314,
1339 | 	153750706,
1340 | 	153763314,
1341 | 	153914034,
1342 | 	154406067,
1343 | 	154417459,
1344 | 	154600979,
1345 | 	154678323,
1346 | 	154680979,
1347 | 	154866835,
1348 | 	155366708,
1349 | 	155375188,
1350 | 	155391572,
1351 | 	155465780,
1352 | 	155869364,
1353 | 	158045494,
1354 | 	168988979,
1355 | 	169321621,
1356 | 	169652752,
1357 | 	173151309,
1358 | 	174240818,
1359 | 	174247297,
1360 | 	174669292,
1361 | 	175391532,
1362 | 	176638123,
1363 | 	177380397,
1364 | 	177879204,
1365 | 	177886734,
1366 | 	180753473,
1367 | 	181020073,
1368 | 	181503558,
1369 | 	181686320,
1370 | 	181999237,
1371 | 	181999311,
1372 | 	182048201,
1373 | 	182074866,
1374 | 	182078003,
1375 | 	182083764,
1376 | 	182920847,
1377 | 	184716457,
1378 | 	184976961,
1379 | 	185145071,
1380 | 	187281445,
1381 | 	187872052,
1382 | 	188100653,
1383 | 	188875944,
1384 | 	188919873,
1385 | 	188920457,
1386 | 	189107250,
1387 | 	189203987,
1388 | 	189371817,
1389 | 	189414886,
1390 | 	189567458,
1391 | 	190266670,
1392 | 	191318187,
1393 | 	191337609,
1394 | 	202479203,
1395 | 	202493027,
1396 | 	202835587,
1397 | 	202843747,
1398 | 	203013219,
1399 | 	203036048,
1400 | 	203045987,
1401 | 	203177552,
1402 | 	203898516,
1403 | 	204648562,
1404 | 	205067918,
1405 | 	205078130,
1406 | 	205096654,
1407 | 	205689142,
1408 | 	205690439,
1409 | 	205988909,
1410 | 	207213161,
1411 | 	207794484,
1412 | 	207800999,
1413 | 	208023602,
1414 | 	208213644,
1415 | 	208213647,
1416 | 	210261490,
1417 | 	210310273,
1418 | 	210940978,
1419 | 	213325049,
1420 | 	213946445,
1421 | 	214055079,
1422 | 	215125040,
1423 | 	215134273,
1424 | 	215135028,
1425 | 	215237420,
1426 | 	215418148,
1427 | 	215553166,
1428 | 	215553394,
1429 | 	215563858,
1430 | 	215627949,
1431 | 	215754324,
1432 | 	217529652,
1433 | 	217713834,
1434 | 	217732628,
1435 | 	218731945,
1436 | 	221417045,
1437 | 	221424946,
1438 | 	221493746,
1439 | 	221515401,
1440 | 	221658189,
1441 | 	221908140,
1442 | 	221910626,
1443 | 	221921586,
1444 | 	222659762,
1445 | 	225001091,
1446 | 	236105833,
1447 | 	236113965,
1448 | 	236194995,
1449 | 	236195427,
1450 | 	236206132,
1451 | 	236206387,
1452 | 	236211683,
1453 | 	236212707,
1454 | 	236381647,
1455 | 	236571826,
1456 | 	237124271,
1457 | 	238172205,
1458 | 	238210544,
1459 | 	238270764,
1460 | 	238435405,
1461 | 	238501172,
1462 | 	239224867,
1463 | 	239257644,
1464 | 	239710497,
1465 | 	240307721,
1466 | 	241208789,
1467 | 	241241557,
1468 | 	241318060,
1469 | 	241319404,
1470 | 	241343533,
1471 | 	241344069,
1472 | 	241405397,
1473 | 	241765845,
1474 | 	243864964,
1475 | 	244502085,
1476 | 	244946220,
1477 | 	245109902,
1478 | 	247647266,
1479 | 	247707956,
1480 | 	248648814,
1481 | 	248648836,
1482 | 	248682161,
1483 | 	248986932,
1484 | 	249058914,
1485 | 	249697357,
1486 | 	252132601,
1487 | 	252135604,
1488 | 	252317348,
1489 | 	255007012,
1490 | 	255278388,
1491 | 	255641645,
1492 | 	256365156,
1493 | 	257566121,
1494 | 	269763372,
1495 | 	271202790,
1496 | 	271863856,
1497 | 	272049197,
1498 | 	272127474,
1499 | 	274339449,
1500 | 	274939471,
1501 | 	275388004,
1502 | 	275388005,
1503 | 	275388006,
1504 | 	275977800,
1505 | 	278267602,
1506 | 	278513831,
1507 | 	278712622,
1508 | 	281613765,
1509 | 	281683369,
1510 | 	282120228,
1511 | 	282250732,
1512 | 	282508942,
1513 | 	283743649,
1514 | 	283787570,
1515 | 	284710386,
1516 | 	285391148,
1517 | 	285478533,
1518 | 	285854898,
1519 | 	285873762,
1520 | 	286931113,
1521 | 	288964227,
1522 | 	289445441,
1523 | 	289689648,
1524 | 	291671489,
1525 | 	303512884,
1526 | 	305319975,
1527 | 	305610036,
1528 | 	305764101,
1529 | 	308448294,
1530 | 	308675890,
1531 | 	312085683,
1532 | 	312264750,
1533 | 	315032867,
1534 | 	316391000,
1535 | 	317331042,
1536 | 	317902135,
1537 | 	318950711,
1538 | 	319447220,
1539 | 	321499182,
1540 | 	322538804,
1541 | 	323145200,
1542 | 	337067316,
1543 | 	337826293,
1544 | 	339905989,
1545 | 	340833697,
1546 | 	341457068,
1547 | 	342310196,
1548 | 	345302593,
1549 | 	349554733,
1550 | 	349771471,
1551 | 	349786245,
1552 | 	350819405,
1553 | 	356072847,
1554 | 	370349192,
1555 | 	373962798,
1556 | 	375558638,
1557 | 	375574835,
1558 | 	376053993,
1559 | 	383276530,
1560 | 	383373833,
1561 | 	383407586,
1562 | 	384439906,
1563 | 	386079012,
1564 | 	404133513,
1565 | 	404307343,
1566 | 	407031852,
1567 | 	408072233,
1568 | 	409112005,
1569 | 	409608425,
1570 | 	409771500,
1571 | 	419040932,
1572 | 	437730612,
1573 | 	439529766,
1574 | 	442616365,
1575 | 	442813037,
1576 | 	443157674,
1577 | 	443295316,
1578 | 	450118444,
1579 | 	450482697,
1580 | 	456789668,
1581 | 	459935396,
1582 | 	471217869,
1583 | 	474073645,
1584 | 	476230702,
1585 | 	476665218,
1586 | 	476717289,
1587 | 	483014825,
1588 | 	485083298,
1589 | 	489306281,
1590 | 	538364390,
1591 | 	540675748,
1592 | 	543819186,
1593 | 	543958612,
1594 | 	576960820,
1595 | 	577242548,
1596 | 	610515252,
1597 | 	642202932,
1598 | 	644420819,
1599 | 	};
1600 | 	}
1601 | 
1602 | }
1603 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/HtmlAttributes.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright (c) 2007 Henri Sivonen
  3 |  * Copyright (c) 2008-2011 Mozilla Foundation
  4 |  * Copyright (c) 2012 Patrick Reisert
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a 
  7 |  * copy of this software and associated documentation files (the "Software"), 
  8 |  * to deal in the Software without restriction, including without limitation 
  9 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 10 |  * and/or sell copies of the Software, and to permit persons to whom the 
 11 |  * Software is furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in 
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 21 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 22 |  * DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | using System;
 26 | using System.Diagnostics;
 27 | using HtmlParserSharp.Common;
 28 | 
 29 | namespace HtmlParserSharp.Core
 30 | {
 31 | 	/// <summary>
 32 | 	/// Be careful with this class. QName is the name in from HTML tokenization.
 33 | 	/// Otherwise, please refer to the interface doc.
 34 | 	/// </summary>
 35 | 	public sealed class HtmlAttributes : IEquatable<HtmlAttributes> /* : Sax.IAttributes*/ {
 36 | 
 37 | 		// [NOCPP[
 38 | 
 39 | 		private static readonly AttributeName[] EMPTY_ATTRIBUTENAMES = new AttributeName[0];
 40 | 
 41 | 		private static readonly string[] EMPTY_stringS = new string[0];
 42 | 
 43 | 		// ]NOCPP]
 44 | 
 45 | 		public static readonly HtmlAttributes EMPTY_ATTRIBUTES = new HtmlAttributes(AttributeName.HTML);
 46 | 
 47 | 		private int mode;
 48 | 
 49 | 		private int length;
 50 | 
 51 | 		private AttributeName[] names;
 52 | 
 53 | 		private string[] values;
 54 | 
 55 | 		// [NOCPP[
 56 | 
 57 | 		private string idValue;
 58 | 
 59 | 		private int xmlnsLength;
 60 | 
 61 | 		private AttributeName[] xmlnsNames;
 62 | 
 63 | 		private string[] xmlnsValues;
 64 | 
 65 | 		// ]NOCPP]
 66 | 
 67 | 		public HtmlAttributes(int mode)
 68 | 		{
 69 | 			this.mode = mode;
 70 | 			this.length = 0;
 71 | 			/*
 72 | 			 * The length of 5 covers covers 98.3% of elements
 73 | 			 * according to Hixie
 74 | 			 */
 75 | 			this.names = new AttributeName[5];
 76 | 			this.values = new string[5];
 77 | 
 78 | 			// [NOCPP[
 79 | 
 80 | 			this.idValue = null;
 81 | 
 82 | 			this.xmlnsLength = 0;
 83 | 
 84 | 			this.xmlnsNames = HtmlAttributes.EMPTY_ATTRIBUTENAMES;
 85 | 
 86 | 			this.xmlnsValues = HtmlAttributes.EMPTY_stringS;
 87 | 
 88 | 			// ]NOCPP]
 89 | 		}
 90 | 		/*
 91 | 		public HtmlAttributes(HtmlAttributes other) {
 92 | 			this.mode = other.mode;
 93 | 			this.length = other.length;
 94 | 			this.names = new AttributeName[other.length];
 95 | 			this.values = new string[other.length];
 96 | 			// [NOCPP[
 97 | 			this.idValue = other.idValue;
 98 | 			this.xmlnsLength = other.xmlnsLength;
 99 | 			this.xmlnsNames = new AttributeName[other.xmlnsLength];
100 | 			this.xmlnsValues = new string[other.xmlnsLength];
101 | 			// ]NOCPP]
102 | 		}
103 | 		*/
104 | 
105 | 		/// <summary>
106 | 		/// Only use with a static argument
107 | 		/// </summary>
108 | 		public int GetIndex(AttributeName name)
109 | 		{
110 | 			for (int i = 0; i < length; i++)
111 | 			{
112 | 				if (names[i] == name)
113 | 				{
114 | 					return i;
115 | 				}
116 | 			}
117 | 			return -1;
118 | 		}
119 | 
120 | 		// [NOCPP[
121 | 
122 | 		public int GetIndex(string qName)
123 | 		{
124 | 			for (int i = 0; i < length; i++)
125 | 			{
126 | 				if (names[i].GetQName(mode) == qName)
127 | 				{
128 | 					return i;
129 | 				}
130 | 			}
131 | 			return -1;
132 | 		}
133 | 
134 | 		public int GetIndex(string uri, string localName)
135 | 		{
136 | 			for (int i = 0; i < length; i++)
137 | 			{
138 | 				if (names[i].GetLocal(mode) == localName
139 | 						&& names[i].GetUri(mode) == uri)
140 | 				{
141 | 					return i;
142 | 				}
143 | 			}
144 | 			return -1;
145 | 		}
146 | 
147 | 		public string GetType(string qName)
148 | 		{
149 | 			int index = GetIndex(qName);
150 | 			if (index == -1)
151 | 			{
152 | 				return null;
153 | 			}
154 | 			else
155 | 			{
156 | 				return GetType(index);
157 | 			}
158 | 		}
159 | 
160 | 		public string GetType(string uri, string localName)
161 | 		{
162 | 			int index = GetIndex(uri, localName);
163 | 			if (index == -1)
164 | 			{
165 | 				return null;
166 | 			}
167 | 			else
168 | 			{
169 | 				return GetType(index);
170 | 			}
171 | 		}
172 | 
173 | 		public string GetValue(string qName)
174 | 		{
175 | 			int index = GetIndex(qName);
176 | 			if (index == -1)
177 | 			{
178 | 				return null;
179 | 			}
180 | 			else
181 | 			{
182 | 				return GetValue(index);
183 | 			}
184 | 		}
185 | 
186 | 		public string GetValue(string uri, string localName)
187 | 		{
188 | 			int index = GetIndex(uri, localName);
189 | 			if (index == -1)
190 | 			{
191 | 				return null;
192 | 			}
193 | 			else
194 | 			{
195 | 				return GetValue(index);
196 | 			}
197 | 		}
198 | 
199 | 		// ]NOCPP]
200 | 
201 | 		public int Length
202 | 		{
203 | 			get
204 | 			{
205 | 				return length;
206 | 			}
207 | 		}
208 | 
209 | 		[Local]
210 | 		public string GetLocalName(int index)
211 | 		{
212 | 			if (index < length && index >= 0)
213 | 			{
214 | 				return names[index].GetLocal(mode);
215 | 			}
216 | 			else
217 | 			{
218 | 				return null;
219 | 			}
220 | 		}
221 | 
222 | 		// [NOCPP[
223 | 
224 | 		public string GetQName(int index)
225 | 		{
226 | 			if (index < length && index >= 0)
227 | 			{
228 | 				return names[index].GetQName(mode);
229 | 			}
230 | 			else
231 | 			{
232 | 				return null;
233 | 			}
234 | 		}
235 | 
236 | 		public string GetType(int index)
237 | 		{
238 | 			if (index < length && index >= 0)
239 | 			{
240 | 				return (names[index] == AttributeName.ID) ? "ID" : "CDATA";
241 | 			}
242 | 			else
243 | 			{
244 | 				return null;
245 | 			}
246 | 		}
247 | 
248 | 		// ]NOCPP]
249 | 
250 | 		public AttributeName GetAttributeName(int index)
251 | 		{
252 | 			if (index < length && index >= 0)
253 | 			{
254 | 				return names[index];
255 | 			}
256 | 			else
257 | 			{
258 | 				return null;
259 | 			}
260 | 		}
261 | 
262 | 		[NsUri]
263 | 		public string GetURI(int index)
264 | 		{
265 | 			if (index < length && index >= 0)
266 | 			{
267 | 				return names[index].GetUri(mode);
268 | 			}
269 | 			else
270 | 			{
271 | 				return null;
272 | 			}
273 | 		}
274 | 
275 | 		[Prefix]
276 | 		public string GetPrefix(int index)
277 | 		{
278 | 			if (index < length && index >= 0)
279 | 			{
280 | 				return names[index].GetPrefix(mode);
281 | 			}
282 | 			else
283 | 			{
284 | 				return null;
285 | 			}
286 | 		}
287 | 
288 | 		public string GetValue(int index)
289 | 		{
290 | 			if (index < length && index >= 0)
291 | 			{
292 | 				return values[index];
293 | 			}
294 | 			else
295 | 			{
296 | 				return null;
297 | 			}
298 | 		}
299 | 
300 | 		/// <summary>
301 | 		/// Only use with static argument.
302 | 		/// </summary>
303 | 		public string GetValue(AttributeName name)
304 | 		{
305 | 			int index = GetIndex(name);
306 | 			if (index == -1)
307 | 			{
308 | 				return null;
309 | 			}
310 | 			else
311 | 			{
312 | 				return GetValue(index);
313 | 			}
314 | 		}
315 | 
316 | 		// [NOCPP[
317 | 
318 | 		public string Id
319 | 		{
320 | 			get
321 | 			{
322 | 				return idValue;
323 | 			}
324 | 		}
325 | 
326 | 		public int XmlnsLength
327 | 		{
328 | 			get
329 | 			{
330 | 				return xmlnsLength;
331 | 			}
332 | 		}
333 | 
334 | 		[Local]
335 | 		public string GetXmlnsLocalName(int index)
336 | 		{
337 | 			if (index < xmlnsLength && index >= 0)
338 | 			{
339 | 				return xmlnsNames[index].GetLocal(mode);
340 | 			}
341 | 			else
342 | 			{
343 | 				return null;
344 | 			}
345 | 		}
346 | 
347 | 		[NsUri]
348 | 		public string GetXmlnsURI(int index)
349 | 		{
350 | 			if (index < xmlnsLength && index >= 0)
351 | 			{
352 | 				return xmlnsNames[index].GetUri(mode);
353 | 			}
354 | 			else
355 | 			{
356 | 				return null;
357 | 			}
358 | 		}
359 | 
360 | 		public string GetXmlnsValue(int index)
361 | 		{
362 | 			if (index < xmlnsLength && index >= 0)
363 | 			{
364 | 				return xmlnsValues[index];
365 | 			}
366 | 			else
367 | 			{
368 | 				return null;
369 | 			}
370 | 		}
371 | 
372 | 		public int GetXmlnsIndex(AttributeName name)
373 | 		{
374 | 			for (int i = 0; i < xmlnsLength; i++)
375 | 			{
376 | 				if (xmlnsNames[i] == name)
377 | 				{
378 | 					return i;
379 | 				}
380 | 			}
381 | 			return -1;
382 | 		}
383 | 
384 | 		public string GetXmlnsValue(AttributeName name)
385 | 		{
386 | 			int index = GetXmlnsIndex(name);
387 | 			if (index == -1)
388 | 			{
389 | 				return null;
390 | 			}
391 | 			else
392 | 			{
393 | 				return GetXmlnsValue(index);
394 | 			}
395 | 		}
396 | 
397 | 		public AttributeName GetXmlnsAttributeName(int index)
398 | 		{
399 | 			if (index < xmlnsLength && index >= 0)
400 | 			{
401 | 				return xmlnsNames[index];
402 | 			}
403 | 			else
404 | 			{
405 | 				return null;
406 | 			}
407 | 		}
408 | 
409 | 		// ]NOCPP]
410 | 
411 | 		internal void AddAttribute(AttributeName name, string value
412 | 			// [NOCPP[
413 | 				, XmlViolationPolicy xmlnsPolicy
414 | 			// ]NOCPP]        
415 | 		)
416 | 		{
417 | 			// [NOCPP[
418 | 			if (name == AttributeName.ID)
419 | 			{
420 | 				idValue = value;
421 | 			}
422 | 
423 | 			if (name.IsXmlns)
424 | 			{
425 | 				if (xmlnsNames.Length == xmlnsLength)
426 | 				{
427 | 					int newLen = xmlnsLength == 0 ? 2 : xmlnsLength << 1;
428 | 					AttributeName[] newNames = new AttributeName[newLen];
429 | 					Array.Copy(xmlnsNames, newNames, xmlnsNames.Length);
430 | 					xmlnsNames = newNames;
431 | 					string[] newValues = new string[newLen];
432 | 					Array.Copy(xmlnsValues, newValues, xmlnsValues.Length);
433 | 					xmlnsValues = newValues;
434 | 				}
435 | 				xmlnsNames[xmlnsLength] = name;
436 | 				xmlnsValues[xmlnsLength] = value;
437 | 				xmlnsLength++;
438 | 				switch (xmlnsPolicy)
439 | 				{
440 | 					case XmlViolationPolicy.Fatal:
441 | 						// this is ugly (TODO)
442 | 						throw new Exception("Saw an xmlns attribute.");
443 | 					case XmlViolationPolicy.AlterInfoset:
444 | 						return;
445 | 					case XmlViolationPolicy.Allow:
446 | 						break; // fall through
447 | 				}
448 | 			}
449 | 
450 | 			// ]NOCPP]
451 | 
452 | 			if (names.Length == length)
453 | 			{
454 | 				int newLen = length << 1; // The first growth covers virtually
455 | 				// 100% of elements according to
456 | 				// Hixie
457 | 				AttributeName[] newNames = new AttributeName[newLen];
458 | 				Array.Copy(names, newNames, names.Length);
459 | 				names = newNames;
460 | 				string[] newValues = new string[newLen];
461 | 				Array.Copy(values, newValues, values.Length);
462 | 				values = newValues;
463 | 			}
464 | 			names[length] = name;
465 | 			values[length] = value;
466 | 			length++;
467 | 		}
468 | 
469 | 		internal void Clear(int m)
470 | 		{
471 | 			for (int i = 0; i < length; i++)
472 | 			{
473 | 				names[i] = null;
474 | 				values[i] = null;
475 | 			}
476 | 			length = 0;
477 | 			mode = m;
478 | 			// [NOCPP[
479 | 			idValue = null;
480 | 			for (int i = 0; i < xmlnsLength; i++)
481 | 			{
482 | 				xmlnsNames[i] = null;
483 | 				xmlnsValues[i] = null;
484 | 			}
485 | 			xmlnsLength = 0;
486 | 			// ]NOCPP]
487 | 		}
488 | 
489 | 		/// <summary>
490 | 		/// This is only used for <code>AttributeName</code> ownership transfer
491 | 		/// in the isindex case to avoid freeing custom names twice in C++.
492 | 		/// </summary>
493 | 		internal void ClearWithoutReleasingContents()
494 | 		{
495 | 			for (int i = 0; i < length; i++)
496 | 			{
497 | 				names[i] = null;
498 | 				values[i] = null;
499 | 			}
500 | 			length = 0;
501 | 		}
502 | 
503 | 		public bool Contains(AttributeName name)
504 | 		{
505 | 			for (int i = 0; i < length; i++)
506 | 			{
507 | 				if (name.EqualsAnother(names[i]))
508 | 				{
509 | 					return true;
510 | 				}
511 | 			}
512 | 			// [NOCPP[
513 | 			for (int i = 0; i < xmlnsLength; i++)
514 | 			{
515 | 				if (name.EqualsAnother(xmlnsNames[i]))
516 | 				{
517 | 					return true;
518 | 				}
519 | 			}
520 | 			// ]NOCPP]
521 | 			return false;
522 | 		}
523 | 
524 | 		public void AdjustForMath()
525 | 		{
526 | 			mode = AttributeName.MATHML;
527 | 		}
528 | 
529 | 		public void AdjustForSvg()
530 | 		{
531 | 			mode = AttributeName.SVG;
532 | 		}
533 | 
534 | 		public HtmlAttributes CloneAttributes()
535 | 		{
536 | 			Debug.Assert((length == 0 && xmlnsLength == 0) || mode == 0 || mode == 3);
537 | 			HtmlAttributes clone = new HtmlAttributes(0);
538 | 			for (int i = 0; i < length; i++)
539 | 			{
540 | 				clone.AddAttribute(names[i].CloneAttributeName(), values[i]
541 | 					// [NOCPP[
542 | 					   , XmlViolationPolicy.Allow
543 | 					// ]NOCPP]
544 | 				);
545 | 			}
546 | 			// [NOCPP[
547 | 			for (int i = 0; i < xmlnsLength; i++)
548 | 			{
549 | 				clone.AddAttribute(xmlnsNames[i],
550 | 						xmlnsValues[i], XmlViolationPolicy.Allow);
551 | 			}
552 | 			// ]NOCPP]
553 | 			return clone; // XXX!!!
554 | 		}
555 | 
556 | 		public bool Equals(HtmlAttributes other)
557 | 		{
558 | 			Debug.Assert(mode == 0 || mode == 3, "Trying to compare attributes in foreign content.");
559 | 			int otherLength = other.Length;
560 | 			if (length != otherLength)
561 | 			{
562 | 				return false;
563 | 			}
564 | 			for (int i = 0; i < length; i++)
565 | 			{
566 | 				// Work around the limitations of C++
567 | 				bool found = false;
568 | 				// The comparing just the local names is OK, since these attribute
569 | 				// holders are both supposed to belong to HTML formatting elements
570 | 				/*[Local]*/
571 | 				string ownLocal = names[i].GetLocal(AttributeName.HTML);
572 | 				for (int j = 0; j < otherLength; j++)
573 | 				{
574 | 					if (ownLocal == other.names[j].GetLocal(AttributeName.HTML))
575 | 					{
576 | 						found = true;
577 | 						if (values[i] != other.values[j])
578 | 						{
579 | 							return false;
580 | 						}
581 | 					}
582 | 				}
583 | 				if (!found)
584 | 				{
585 | 					return false;
586 | 				}
587 | 			}
588 | 			return true;
589 | 		}
590 | 
591 | 		// [NOCPP[
592 | 
593 | 		internal void ProcessNonNcNames<T>(TreeBuilder<T> treeBuilder, XmlViolationPolicy namePolicy) where T : class
594 | 		{
595 | 			for (int i = 0; i < length; i++)
596 | 			{
597 | 				AttributeName attName = names[i];
598 | 				if (!attName.IsNcName(mode))
599 | 				{
600 | 					string name = attName.GetLocal(mode);
601 | 					switch (namePolicy)
602 | 					{
603 | 						case XmlViolationPolicy.AlterInfoset:
604 | 							names[i] = AttributeName.Create(NCName.EscapeName(name));
605 | 							goto case XmlViolationPolicy.Allow; // fall through
606 | 						case XmlViolationPolicy.Allow:
607 | 							if (attName != AttributeName.XML_LANG)
608 | 							{
609 | 								treeBuilder.Warn("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0.");
610 | 							}
611 | 							break;
612 | 						case XmlViolationPolicy.Fatal:
613 | 							treeBuilder.Fatal("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0.");
614 | 							break;
615 | 					}
616 | 				}
617 | 			}
618 | 		}
619 | 
620 | 		public void Merge(HtmlAttributes attributes)
621 | 		{
622 | 			int len = attributes.Length;
623 | 			for (int i = 0; i < len; i++)
624 | 			{
625 | 				AttributeName name = attributes.GetAttributeName(i);
626 | 				if (!Contains(name))
627 | 				{
628 | 					AddAttribute(name, attributes.GetValue(i), XmlViolationPolicy.Allow);
629 | 				}
630 | 			}
631 | 		}
632 | 
633 | 		// ]NOCPP]
634 | 	}
635 | }
636 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/ILocator.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2012 Patrick Reisert
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 5 |  * copy of this software and associated documentation files (the "Software"), 
 6 |  * to deal in the Software without restriction, including without limitation 
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 8 |  * and/or sell copies of the Software, and to permit persons to whom the 
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in 
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | namespace HtmlParserSharp.Core
24 | {
25 | 	/// <summary>
26 | 	/// Interface for getting the current line and column
27 | 	/// (Corresponds to the SAX Locator interface).
28 | 	/// This is implemented by Locator and Tokenizer.
29 | 	/// </summary>
30 | 	public interface ILocator
31 | 	{
32 | 		int LineNumber { get; }
33 | 		int ColumnNumber { get; }
34 | 	}
35 | }
36 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/ITreeBuilderState.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2009-2010 Mozilla Foundation
 3 |  * Copyright (c) 2012 Patrick Reisert
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 6 |  * copy of this software and associated documentation files (the "Software"), 
 7 |  * to deal in the Software without restriction, including without limitation 
 8 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 9 |  * and/or sell copies of the Software, and to permit persons to whom the 
10 |  * Software is furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in 
13 |  * all copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
20 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
21 |  * DEALINGS IN THE SOFTWARE.
22 |  */
23 | 
24 | namespace HtmlParserSharp.Core
25 | {
26 | 	/// <summary>
27 | 	/// Interface for exposing the state of the HTML5 tree builder so that the
28 | 	/// interface can be implemented by the tree builder itself and by snapshots.
29 | 	/// </summary>
30 | 	public interface ITreeBuilderState<T> where T : class
31 | 	{
32 | 		/// <summary>
33 | 		/// Gets the stack.
34 | 		/// </summary>
35 | 		/// <returns>The stack</returns>
36 | 		StackNode<T>[] Stack { get; }
37 | 
38 | 		/// <summary>
39 | 		/// Gets the list of active formatting elements.
40 | 		/// </summary>
41 | 		/// <returns>The list of active formatting elements.</returns>
42 | 		StackNode<T>[] ListOfActiveFormattingElements { get; }
43 | 
44 | 		/// <summary>
45 | 		/// Gets the form pointer.
46 | 		/// </summary>
47 | 		/// <returns>The form pointer</returns>
48 | 		T FormPointer { get; }
49 | 
50 | 		/// <summary>
51 | 		/// Gets the head pointer.
52 | 		/// </summary>
53 | 		/// <returns>The head pointer.</returns>
54 | 		T HeadPointer { get; }
55 | 
56 | 		/// <summary>
57 | 		/// Gets the deep tree surrogate parent.
58 | 		/// </summary>
59 | 		/// <returns>The deep tree surrogate parent.</returns>
60 | 		T DeepTreeSurrogateParent { get; }
61 | 
62 | 		/// <summary>
63 | 		/// Gets the mode.
64 | 		/// </summary>
65 | 		/// <returns>The mode.</returns>
66 | 		InsertionMode Mode { get; }
67 | 
68 | 		/// <summary>
69 | 		/// Gets the original mode.
70 | 		/// </summary>
71 | 		/// <returns>The original mode.</returns>
72 | 		InsertionMode OriginalMode { get; }
73 | 
74 | 		/// <summary>
75 | 		/// Determines whether the frameset is OK.
76 | 		/// </summary>
77 | 		/// <returns>
78 | 		///   <c>true</c> if the frameset is OK; otherwise, <c>false</c>.
79 | 		/// </returns>
80 | 		bool IsFramesetOk { get; }
81 | 
82 | 		/// <summary>
83 | 		/// Determines whether we need to drop LF.
84 | 		/// </summary>
85 | 		/// <returns>
86 | 		///   <c>true</c> if we need to drop LF; otherwise, <c>false</c>.
87 | 		/// </returns>
88 | 		bool IsNeedToDropLF { get; }
89 | 
90 | 		/// <summary>
91 | 		/// Determines whether this instance is in quirks mode.
92 | 		/// </summary>
93 | 		/// <returns>
94 | 		///   <c>true</c> if this instance is in quirks mode; otherwise, <c>false</c>.
95 | 		/// </returns>
96 | 		bool IsQuirks { get; }
97 | 	}
98 | }
99 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/InsertionMode.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2007 Henri Sivonen
 3 |  * Copyright (c) 2007-2011 Mozilla Foundation
 4 |  * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla 
 5 |  * Foundation, and Opera Software ASA.
 6 |  * Copyright (c) 2012 Patrick Reisert
 7 |  *
 8 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 9 |  * copy of this software and associated documentation files (the "Software"), 
10 |  * to deal in the Software without restriction, including without limitation 
11 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
12 |  * and/or sell copies of the Software, and to permit persons to whom the 
13 |  * Software is furnished to do so, subject to the following conditions:
14 |  *
15 |  * The above copyright notice and this permission notice shall be included in 
16 |  * all copies or substantial portions of the Software.
17 |  *
18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
19 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
20 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
21 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
22 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
23 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
24 |  * DEALINGS IN THE SOFTWARE.
25 |  */
26 | 
27 | namespace HtmlParserSharp.Core
28 | {
29 | 	public enum InsertionMode
30 | 	{
31 | 		INITIAL = 0,
32 | 
33 | 		BEFORE_HTML = 1,
34 | 
35 | 		BEFORE_HEAD = 2,
36 | 
37 | 		IN_HEAD = 3,
38 | 
39 | 		IN_HEAD_NOSCRIPT = 4,
40 | 
41 | 		AFTER_HEAD = 5,
42 | 
43 | 		IN_BODY = 6,
44 | 
45 | 		IN_TABLE = 7,
46 | 
47 | 		IN_CAPTION = 8,
48 | 
49 | 		IN_COLUMN_GROUP = 9,
50 | 
51 | 		IN_TABLE_BODY = 10,
52 | 
53 | 		IN_ROW = 11,
54 | 
55 | 		IN_CELL = 12,
56 | 
57 | 		IN_SELECT = 13,
58 | 
59 | 		IN_SELECT_IN_TABLE = 14,
60 | 
61 | 		AFTER_BODY = 15,
62 | 
63 | 		IN_FRAMESET = 16,
64 | 
65 | 		AFTER_FRAMESET = 17,
66 | 
67 | 		AFTER_AFTER_BODY = 18,
68 | 
69 | 		AFTER_AFTER_FRAMESET = 19,
70 | 
71 | 		TEXT = 20,
72 | 
73 | 		FRAMESET_OK = 21
74 | 	}
75 | }
76 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/Locator.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2007 Henri Sivonen
 3 |  * Copyright (c) 2011 Mozilla Foundation
 4 |  * Copyright (c) 2012 Patrick Reisert
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 7 |  * copy of this software and associated documentation files (the "Software"), 
 8 |  * to deal in the Software without restriction, including without limitation 
 9 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
10 |  * and/or sell copies of the Software, and to permit persons to whom the 
11 |  * Software is furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in 
14 |  * all copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
21 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
22 |  * DEALINGS IN THE SOFTWARE.
23 |  */
24 | 
25 | namespace HtmlParserSharp.Core
26 | {
27 | 	public class Locator : ILocator
28 | 	{
29 | 		public int ColumnNumber { get; private set; }
30 | 
31 | 		public int LineNumber { get; private set; }
32 | 
33 | 		public Locator(ILocator locator)
34 | 		{
35 | 			ColumnNumber = locator.ColumnNumber;
36 | 			LineNumber = locator.LineNumber;
37 | 		}
38 | 	}
39 | }
40 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/NCName.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright (c) 2008-2009 Mozilla Foundation
  3 |  * Copyright (c) 2012 Patrick Reisert
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining a 
  6 |  * copy of this software and associated documentation files (the "Software"), 
  7 |  * to deal in the Software without restriction, including without limitation 
  8 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
  9 |  * and/or sell copies of the Software, and to permit persons to whom the 
 10 |  * Software is furnished to do so, subject to the following conditions:
 11 |  *
 12 |  * The above copyright notice and this permission notice shall be included in 
 13 |  * all copies or substantial portions of the Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 20 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 21 |  * DEALINGS IN THE SOFTWARE.
 22 |  */
 23 | 
 24 | using System;
 25 | using System.Text;
 26 | 
 27 | namespace HtmlParserSharp.Core
 28 | {
 29 | 	public sealed class NCName
 30 | 	{
 31 | 		// [NOCPP[
 32 | 
 33 | 		private const int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
 34 | 
 35 | 		private static readonly char[] HEX_TABLE = "0123456789ABCDEF".ToCharArray();
 36 | 
 37 | 		public static bool IsNCNameStart(char c)
 38 | 		{
 39 | 			return ((c >= '\u0041' && c <= '\u005A')
 40 | 					|| (c >= '\u0061' && c <= '\u007A')
 41 | 					|| (c >= '\u00C0' && c <= '\u00D6')
 42 | 					|| (c >= '\u00D8' && c <= '\u00F6')
 43 | 					|| (c >= '\u00F8' && c <= '\u00FF')
 44 | 					|| (c >= '\u0100' && c <= '\u0131')
 45 | 					|| (c >= '\u0134' && c <= '\u013E')
 46 | 					|| (c >= '\u0141' && c <= '\u0148')
 47 | 					|| (c >= '\u014A' && c <= '\u017E')
 48 | 					|| (c >= '\u0180' && c <= '\u01C3')
 49 | 					|| (c >= '\u01CD' && c <= '\u01F0')
 50 | 					|| (c >= '\u01F4' && c <= '\u01F5')
 51 | 					|| (c >= '\u01FA' && c <= '\u0217')
 52 | 					|| (c >= '\u0250' && c <= '\u02A8')
 53 | 					|| (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386')
 54 | 					|| (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
 55 | 					|| (c >= '\u038E' && c <= '\u03A1')
 56 | 					|| (c >= '\u03A3' && c <= '\u03CE')
 57 | 					|| (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA')
 58 | 					|| (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0')
 59 | 					|| (c >= '\u03E2' && c <= '\u03F3')
 60 | 					|| (c >= '\u0401' && c <= '\u040C')
 61 | 					|| (c >= '\u040E' && c <= '\u044F')
 62 | 					|| (c >= '\u0451' && c <= '\u045C')
 63 | 					|| (c >= '\u045E' && c <= '\u0481')
 64 | 					|| (c >= '\u0490' && c <= '\u04C4')
 65 | 					|| (c >= '\u04C7' && c <= '\u04C8')
 66 | 					|| (c >= '\u04CB' && c <= '\u04CC')
 67 | 					|| (c >= '\u04D0' && c <= '\u04EB')
 68 | 					|| (c >= '\u04EE' && c <= '\u04F5')
 69 | 					|| (c >= '\u04F8' && c <= '\u04F9')
 70 | 					|| (c >= '\u0531' && c <= '\u0556') || (c == '\u0559')
 71 | 					|| (c >= '\u0561' && c <= '\u0586')
 72 | 					|| (c >= '\u05D0' && c <= '\u05EA')
 73 | 					|| (c >= '\u05F0' && c <= '\u05F2')
 74 | 					|| (c >= '\u0621' && c <= '\u063A')
 75 | 					|| (c >= '\u0641' && c <= '\u064A')
 76 | 					|| (c >= '\u0671' && c <= '\u06B7')
 77 | 					|| (c >= '\u06BA' && c <= '\u06BE')
 78 | 					|| (c >= '\u06C0' && c <= '\u06CE')
 79 | 					|| (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5')
 80 | 					|| (c >= '\u06E5' && c <= '\u06E6')
 81 | 					|| (c >= '\u0905' && c <= '\u0939') || (c == '\u093D')
 82 | 					|| (c >= '\u0958' && c <= '\u0961')
 83 | 					|| (c >= '\u0985' && c <= '\u098C')
 84 | 					|| (c >= '\u098F' && c <= '\u0990')
 85 | 					|| (c >= '\u0993' && c <= '\u09A8')
 86 | 					|| (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2')
 87 | 					|| (c >= '\u09B6' && c <= '\u09B9')
 88 | 					|| (c >= '\u09DC' && c <= '\u09DD')
 89 | 					|| (c >= '\u09DF' && c <= '\u09E1')
 90 | 					|| (c >= '\u09F0' && c <= '\u09F1')
 91 | 					|| (c >= '\u0A05' && c <= '\u0A0A')
 92 | 					|| (c >= '\u0A0F' && c <= '\u0A10')
 93 | 					|| (c >= '\u0A13' && c <= '\u0A28')
 94 | 					|| (c >= '\u0A2A' && c <= '\u0A30')
 95 | 					|| (c >= '\u0A32' && c <= '\u0A33')
 96 | 					|| (c >= '\u0A35' && c <= '\u0A36')
 97 | 					|| (c >= '\u0A38' && c <= '\u0A39')
 98 | 					|| (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E')
 99 | 					|| (c >= '\u0A72' && c <= '\u0A74')
100 | 					|| (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D')
101 | 					|| (c >= '\u0A8F' && c <= '\u0A91')
102 | 					|| (c >= '\u0A93' && c <= '\u0AA8')
103 | 					|| (c >= '\u0AAA' && c <= '\u0AB0')
104 | 					|| (c >= '\u0AB2' && c <= '\u0AB3')
105 | 					|| (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD')
106 | 					|| (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C')
107 | 					|| (c >= '\u0B0F' && c <= '\u0B10')
108 | 					|| (c >= '\u0B13' && c <= '\u0B28')
109 | 					|| (c >= '\u0B2A' && c <= '\u0B30')
110 | 					|| (c >= '\u0B32' && c <= '\u0B33')
111 | 					|| (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D')
112 | 					|| (c >= '\u0B5C' && c <= '\u0B5D')
113 | 					|| (c >= '\u0B5F' && c <= '\u0B61')
114 | 					|| (c >= '\u0B85' && c <= '\u0B8A')
115 | 					|| (c >= '\u0B8E' && c <= '\u0B90')
116 | 					|| (c >= '\u0B92' && c <= '\u0B95')
117 | 					|| (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C')
118 | 					|| (c >= '\u0B9E' && c <= '\u0B9F')
119 | 					|| (c >= '\u0BA3' && c <= '\u0BA4')
120 | 					|| (c >= '\u0BA8' && c <= '\u0BAA')
121 | 					|| (c >= '\u0BAE' && c <= '\u0BB5')
122 | 					|| (c >= '\u0BB7' && c <= '\u0BB9')
123 | 					|| (c >= '\u0C05' && c <= '\u0C0C')
124 | 					|| (c >= '\u0C0E' && c <= '\u0C10')
125 | 					|| (c >= '\u0C12' && c <= '\u0C28')
126 | 					|| (c >= '\u0C2A' && c <= '\u0C33')
127 | 					|| (c >= '\u0C35' && c <= '\u0C39')
128 | 					|| (c >= '\u0C60' && c <= '\u0C61')
129 | 					|| (c >= '\u0C85' && c <= '\u0C8C')
130 | 					|| (c >= '\u0C8E' && c <= '\u0C90')
131 | 					|| (c >= '\u0C92' && c <= '\u0CA8')
132 | 					|| (c >= '\u0CAA' && c <= '\u0CB3')
133 | 					|| (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE')
134 | 					|| (c >= '\u0CE0' && c <= '\u0CE1')
135 | 					|| (c >= '\u0D05' && c <= '\u0D0C')
136 | 					|| (c >= '\u0D0E' && c <= '\u0D10')
137 | 					|| (c >= '\u0D12' && c <= '\u0D28')
138 | 					|| (c >= '\u0D2A' && c <= '\u0D39')
139 | 					|| (c >= '\u0D60' && c <= '\u0D61')
140 | 					|| (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30')
141 | 					|| (c >= '\u0E32' && c <= '\u0E33')
142 | 					|| (c >= '\u0E40' && c <= '\u0E45')
143 | 					|| (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84')
144 | 					|| (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A')
145 | 					|| (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97')
146 | 					|| (c >= '\u0E99' && c <= '\u0E9F')
147 | 					|| (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5')
148 | 					|| (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB')
149 | 					|| (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0')
150 | 					|| (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD')
151 | 					|| (c >= '\u0EC0' && c <= '\u0EC4')
152 | 					|| (c >= '\u0F40' && c <= '\u0F47')
153 | 					|| (c >= '\u0F49' && c <= '\u0F69')
154 | 					|| (c >= '\u10A0' && c <= '\u10C5')
155 | 					|| (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100')
156 | 					|| (c >= '\u1102' && c <= '\u1103')
157 | 					|| (c >= '\u1105' && c <= '\u1107') || (c == '\u1109')
158 | 					|| (c >= '\u110B' && c <= '\u110C')
159 | 					|| (c >= '\u110E' && c <= '\u1112') || (c == '\u113C')
160 | 					|| (c == '\u113E') || (c == '\u1140') || (c == '\u114C')
161 | 					|| (c == '\u114E') || (c == '\u1150')
162 | 					|| (c >= '\u1154' && c <= '\u1155') || (c == '\u1159')
163 | 					|| (c >= '\u115F' && c <= '\u1161') || (c == '\u1163')
164 | 					|| (c == '\u1165') || (c == '\u1167') || (c == '\u1169')
165 | 					|| (c >= '\u116D' && c <= '\u116E')
166 | 					|| (c >= '\u1172' && c <= '\u1173') || (c == '\u1175')
167 | 					|| (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB')
168 | 					|| (c >= '\u11AE' && c <= '\u11AF')
169 | 					|| (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA')
170 | 					|| (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB')
171 | 					|| (c == '\u11F0') || (c == '\u11F9')
172 | 					|| (c >= '\u1E00' && c <= '\u1E9B')
173 | 					|| (c >= '\u1EA0' && c <= '\u1EF9')
174 | 					|| (c >= '\u1F00' && c <= '\u1F15')
175 | 					|| (c >= '\u1F18' && c <= '\u1F1D')
176 | 					|| (c >= '\u1F20' && c <= '\u1F45')
177 | 					|| (c >= '\u1F48' && c <= '\u1F4D')
178 | 					|| (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59')
179 | 					|| (c == '\u1F5B') || (c == '\u1F5D')
180 | 					|| (c >= '\u1F5F' && c <= '\u1F7D')
181 | 					|| (c >= '\u1F80' && c <= '\u1FB4')
182 | 					|| (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE')
183 | 					|| (c >= '\u1FC2' && c <= '\u1FC4')
184 | 					|| (c >= '\u1FC6' && c <= '\u1FCC')
185 | 					|| (c >= '\u1FD0' && c <= '\u1FD3')
186 | 					|| (c >= '\u1FD6' && c <= '\u1FDB')
187 | 					|| (c >= '\u1FE0' && c <= '\u1FEC')
188 | 					|| (c >= '\u1FF2' && c <= '\u1FF4')
189 | 					|| (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126')
190 | 					|| (c >= '\u212A' && c <= '\u212B') || (c == '\u212E')
191 | 					|| (c >= '\u2180' && c <= '\u2182')
192 | 					|| (c >= '\u3041' && c <= '\u3094')
193 | 					|| (c >= '\u30A1' && c <= '\u30FA')
194 | 					|| (c >= '\u3105' && c <= '\u312C')
195 | 					|| (c >= '\uAC00' && c <= '\uD7A3')
196 | 					|| (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007')
197 | 					|| (c >= '\u3021' && c <= '\u3029') || (c == '_'));
198 | 		}
199 | 
200 | 		public static bool IsNCNameTrail(char c)
201 | 		{
202 | 			return ((c >= '\u0030' && c <= '\u0039')
203 | 					|| (c >= '\u0660' && c <= '\u0669')
204 | 					|| (c >= '\u06F0' && c <= '\u06F9')
205 | 					|| (c >= '\u0966' && c <= '\u096F')
206 | 					|| (c >= '\u09E6' && c <= '\u09EF')
207 | 					|| (c >= '\u0A66' && c <= '\u0A6F')
208 | 					|| (c >= '\u0AE6' && c <= '\u0AEF')
209 | 					|| (c >= '\u0B66' && c <= '\u0B6F')
210 | 					|| (c >= '\u0BE7' && c <= '\u0BEF')
211 | 					|| (c >= '\u0C66' && c <= '\u0C6F')
212 | 					|| (c >= '\u0CE6' && c <= '\u0CEF')
213 | 					|| (c >= '\u0D66' && c <= '\u0D6F')
214 | 					|| (c >= '\u0E50' && c <= '\u0E59')
215 | 					|| (c >= '\u0ED0' && c <= '\u0ED9')
216 | 					|| (c >= '\u0F20' && c <= '\u0F29')
217 | 					|| (c >= '\u0041' && c <= '\u005A')
218 | 					|| (c >= '\u0061' && c <= '\u007A')
219 | 					|| (c >= '\u00C0' && c <= '\u00D6')
220 | 					|| (c >= '\u00D8' && c <= '\u00F6')
221 | 					|| (c >= '\u00F8' && c <= '\u00FF')
222 | 					|| (c >= '\u0100' && c <= '\u0131')
223 | 					|| (c >= '\u0134' && c <= '\u013E')
224 | 					|| (c >= '\u0141' && c <= '\u0148')
225 | 					|| (c >= '\u014A' && c <= '\u017E')
226 | 					|| (c >= '\u0180' && c <= '\u01C3')
227 | 					|| (c >= '\u01CD' && c <= '\u01F0')
228 | 					|| (c >= '\u01F4' && c <= '\u01F5')
229 | 					|| (c >= '\u01FA' && c <= '\u0217')
230 | 					|| (c >= '\u0250' && c <= '\u02A8')
231 | 					|| (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386')
232 | 					|| (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
233 | 					|| (c >= '\u038E' && c <= '\u03A1')
234 | 					|| (c >= '\u03A3' && c <= '\u03CE')
235 | 					|| (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA')
236 | 					|| (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0')
237 | 					|| (c >= '\u03E2' && c <= '\u03F3')
238 | 					|| (c >= '\u0401' && c <= '\u040C')
239 | 					|| (c >= '\u040E' && c <= '\u044F')
240 | 					|| (c >= '\u0451' && c <= '\u045C')
241 | 					|| (c >= '\u045E' && c <= '\u0481')
242 | 					|| (c >= '\u0490' && c <= '\u04C4')
243 | 					|| (c >= '\u04C7' && c <= '\u04C8')
244 | 					|| (c >= '\u04CB' && c <= '\u04CC')
245 | 					|| (c >= '\u04D0' && c <= '\u04EB')
246 | 					|| (c >= '\u04EE' && c <= '\u04F5')
247 | 					|| (c >= '\u04F8' && c <= '\u04F9')
248 | 					|| (c >= '\u0531' && c <= '\u0556') || (c == '\u0559')
249 | 					|| (c >= '\u0561' && c <= '\u0586')
250 | 					|| (c >= '\u05D0' && c <= '\u05EA')
251 | 					|| (c >= '\u05F0' && c <= '\u05F2')
252 | 					|| (c >= '\u0621' && c <= '\u063A')
253 | 					|| (c >= '\u0641' && c <= '\u064A')
254 | 					|| (c >= '\u0671' && c <= '\u06B7')
255 | 					|| (c >= '\u06BA' && c <= '\u06BE')
256 | 					|| (c >= '\u06C0' && c <= '\u06CE')
257 | 					|| (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5')
258 | 					|| (c >= '\u06E5' && c <= '\u06E6')
259 | 					|| (c >= '\u0905' && c <= '\u0939') || (c == '\u093D')
260 | 					|| (c >= '\u0958' && c <= '\u0961')
261 | 					|| (c >= '\u0985' && c <= '\u098C')
262 | 					|| (c >= '\u098F' && c <= '\u0990')
263 | 					|| (c >= '\u0993' && c <= '\u09A8')
264 | 					|| (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2')
265 | 					|| (c >= '\u09B6' && c <= '\u09B9')
266 | 					|| (c >= '\u09DC' && c <= '\u09DD')
267 | 					|| (c >= '\u09DF' && c <= '\u09E1')
268 | 					|| (c >= '\u09F0' && c <= '\u09F1')
269 | 					|| (c >= '\u0A05' && c <= '\u0A0A')
270 | 					|| (c >= '\u0A0F' && c <= '\u0A10')
271 | 					|| (c >= '\u0A13' && c <= '\u0A28')
272 | 					|| (c >= '\u0A2A' && c <= '\u0A30')
273 | 					|| (c >= '\u0A32' && c <= '\u0A33')
274 | 					|| (c >= '\u0A35' && c <= '\u0A36')
275 | 					|| (c >= '\u0A38' && c <= '\u0A39')
276 | 					|| (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E')
277 | 					|| (c >= '\u0A72' && c <= '\u0A74')
278 | 					|| (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D')
279 | 					|| (c >= '\u0A8F' && c <= '\u0A91')
280 | 					|| (c >= '\u0A93' && c <= '\u0AA8')
281 | 					|| (c >= '\u0AAA' && c <= '\u0AB0')
282 | 					|| (c >= '\u0AB2' && c <= '\u0AB3')
283 | 					|| (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD')
284 | 					|| (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C')
285 | 					|| (c >= '\u0B0F' && c <= '\u0B10')
286 | 					|| (c >= '\u0B13' && c <= '\u0B28')
287 | 					|| (c >= '\u0B2A' && c <= '\u0B30')
288 | 					|| (c >= '\u0B32' && c <= '\u0B33')
289 | 					|| (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D')
290 | 					|| (c >= '\u0B5C' && c <= '\u0B5D')
291 | 					|| (c >= '\u0B5F' && c <= '\u0B61')
292 | 					|| (c >= '\u0B85' && c <= '\u0B8A')
293 | 					|| (c >= '\u0B8E' && c <= '\u0B90')
294 | 					|| (c >= '\u0B92' && c <= '\u0B95')
295 | 					|| (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C')
296 | 					|| (c >= '\u0B9E' && c <= '\u0B9F')
297 | 					|| (c >= '\u0BA3' && c <= '\u0BA4')
298 | 					|| (c >= '\u0BA8' && c <= '\u0BAA')
299 | 					|| (c >= '\u0BAE' && c <= '\u0BB5')
300 | 					|| (c >= '\u0BB7' && c <= '\u0BB9')
301 | 					|| (c >= '\u0C05' && c <= '\u0C0C')
302 | 					|| (c >= '\u0C0E' && c <= '\u0C10')
303 | 					|| (c >= '\u0C12' && c <= '\u0C28')
304 | 					|| (c >= '\u0C2A' && c <= '\u0C33')
305 | 					|| (c >= '\u0C35' && c <= '\u0C39')
306 | 					|| (c >= '\u0C60' && c <= '\u0C61')
307 | 					|| (c >= '\u0C85' && c <= '\u0C8C')
308 | 					|| (c >= '\u0C8E' && c <= '\u0C90')
309 | 					|| (c >= '\u0C92' && c <= '\u0CA8')
310 | 					|| (c >= '\u0CAA' && c <= '\u0CB3')
311 | 					|| (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE')
312 | 					|| (c >= '\u0CE0' && c <= '\u0CE1')
313 | 					|| (c >= '\u0D05' && c <= '\u0D0C')
314 | 					|| (c >= '\u0D0E' && c <= '\u0D10')
315 | 					|| (c >= '\u0D12' && c <= '\u0D28')
316 | 					|| (c >= '\u0D2A' && c <= '\u0D39')
317 | 					|| (c >= '\u0D60' && c <= '\u0D61')
318 | 					|| (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30')
319 | 					|| (c >= '\u0E32' && c <= '\u0E33')
320 | 					|| (c >= '\u0E40' && c <= '\u0E45')
321 | 					|| (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84')
322 | 					|| (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A')
323 | 					|| (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97')
324 | 					|| (c >= '\u0E99' && c <= '\u0E9F')
325 | 					|| (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5')
326 | 					|| (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB')
327 | 					|| (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0')
328 | 					|| (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD')
329 | 					|| (c >= '\u0EC0' && c <= '\u0EC4')
330 | 					|| (c >= '\u0F40' && c <= '\u0F47')
331 | 					|| (c >= '\u0F49' && c <= '\u0F69')
332 | 					|| (c >= '\u10A0' && c <= '\u10C5')
333 | 					|| (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100')
334 | 					|| (c >= '\u1102' && c <= '\u1103')
335 | 					|| (c >= '\u1105' && c <= '\u1107') || (c == '\u1109')
336 | 					|| (c >= '\u110B' && c <= '\u110C')
337 | 					|| (c >= '\u110E' && c <= '\u1112') || (c == '\u113C')
338 | 					|| (c == '\u113E') || (c == '\u1140') || (c == '\u114C')
339 | 					|| (c == '\u114E') || (c == '\u1150')
340 | 					|| (c >= '\u1154' && c <= '\u1155') || (c == '\u1159')
341 | 					|| (c >= '\u115F' && c <= '\u1161') || (c == '\u1163')
342 | 					|| (c == '\u1165') || (c == '\u1167') || (c == '\u1169')
343 | 					|| (c >= '\u116D' && c <= '\u116E')
344 | 					|| (c >= '\u1172' && c <= '\u1173') || (c == '\u1175')
345 | 					|| (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB')
346 | 					|| (c >= '\u11AE' && c <= '\u11AF')
347 | 					|| (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA')
348 | 					|| (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB')
349 | 					|| (c == '\u11F0') || (c == '\u11F9')
350 | 					|| (c >= '\u1E00' && c <= '\u1E9B')
351 | 					|| (c >= '\u1EA0' && c <= '\u1EF9')
352 | 					|| (c >= '\u1F00' && c <= '\u1F15')
353 | 					|| (c >= '\u1F18' && c <= '\u1F1D')
354 | 					|| (c >= '\u1F20' && c <= '\u1F45')
355 | 					|| (c >= '\u1F48' && c <= '\u1F4D')
356 | 					|| (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59')
357 | 					|| (c == '\u1F5B') || (c == '\u1F5D')
358 | 					|| (c >= '\u1F5F' && c <= '\u1F7D')
359 | 					|| (c >= '\u1F80' && c <= '\u1FB4')
360 | 					|| (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE')
361 | 					|| (c >= '\u1FC2' && c <= '\u1FC4')
362 | 					|| (c >= '\u1FC6' && c <= '\u1FCC')
363 | 					|| (c >= '\u1FD0' && c <= '\u1FD3')
364 | 					|| (c >= '\u1FD6' && c <= '\u1FDB')
365 | 					|| (c >= '\u1FE0' && c <= '\u1FEC')
366 | 					|| (c >= '\u1FF2' && c <= '\u1FF4')
367 | 					|| (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126')
368 | 					|| (c >= '\u212A' && c <= '\u212B') || (c == '\u212E')
369 | 					|| (c >= '\u2180' && c <= '\u2182')
370 | 					|| (c >= '\u3041' && c <= '\u3094')
371 | 					|| (c >= '\u30A1' && c <= '\u30FA')
372 | 					|| (c >= '\u3105' && c <= '\u312C')
373 | 					|| (c >= '\uAC00' && c <= '\uD7A3')
374 | 					|| (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007')
375 | 					|| (c >= '\u3021' && c <= '\u3029') || (c == '_') || (c == '.')
376 | 					|| (c == '-') || (c >= '\u0300' && c <= '\u0345')
377 | 					|| (c >= '\u0360' && c <= '\u0361')
378 | 					|| (c >= '\u0483' && c <= '\u0486')
379 | 					|| (c >= '\u0591' && c <= '\u05A1')
380 | 					|| (c >= '\u05A3' && c <= '\u05B9')
381 | 					|| (c >= '\u05BB' && c <= '\u05BD') || (c == '\u05BF')
382 | 					|| (c >= '\u05C1' && c <= '\u05C2') || (c == '\u05C4')
383 | 					|| (c >= '\u064B' && c <= '\u0652') || (c == '\u0670')
384 | 					|| (c >= '\u06D6' && c <= '\u06DC')
385 | 					|| (c >= '\u06DD' && c <= '\u06DF')
386 | 					|| (c >= '\u06E0' && c <= '\u06E4')
387 | 					|| (c >= '\u06E7' && c <= '\u06E8')
388 | 					|| (c >= '\u06EA' && c <= '\u06ED')
389 | 					|| (c >= '\u0901' && c <= '\u0903') || (c == '\u093C')
390 | 					|| (c >= '\u093E' && c <= '\u094C') || (c == '\u094D')
391 | 					|| (c >= '\u0951' && c <= '\u0954')
392 | 					|| (c >= '\u0962' && c <= '\u0963')
393 | 					|| (c >= '\u0981' && c <= '\u0983') || (c == '\u09BC')
394 | 					|| (c == '\u09BE') || (c == '\u09BF')
395 | 					|| (c >= '\u09C0' && c <= '\u09C4')
396 | 					|| (c >= '\u09C7' && c <= '\u09C8')
397 | 					|| (c >= '\u09CB' && c <= '\u09CD') || (c == '\u09D7')
398 | 					|| (c >= '\u09E2' && c <= '\u09E3') || (c == '\u0A02')
399 | 					|| (c == '\u0A3C') || (c == '\u0A3E') || (c == '\u0A3F')
400 | 					|| (c >= '\u0A40' && c <= '\u0A42')
401 | 					|| (c >= '\u0A47' && c <= '\u0A48')
402 | 					|| (c >= '\u0A4B' && c <= '\u0A4D')
403 | 					|| (c >= '\u0A70' && c <= '\u0A71')
404 | 					|| (c >= '\u0A81' && c <= '\u0A83') || (c == '\u0ABC')
405 | 					|| (c >= '\u0ABE' && c <= '\u0AC5')
406 | 					|| (c >= '\u0AC7' && c <= '\u0AC9')
407 | 					|| (c >= '\u0ACB' && c <= '\u0ACD')
408 | 					|| (c >= '\u0B01' && c <= '\u0B03') || (c == '\u0B3C')
409 | 					|| (c >= '\u0B3E' && c <= '\u0B43')
410 | 					|| (c >= '\u0B47' && c <= '\u0B48')
411 | 					|| (c >= '\u0B4B' && c <= '\u0B4D')
412 | 					|| (c >= '\u0B56' && c <= '\u0B57')
413 | 					|| (c >= '\u0B82' && c <= '\u0B83')
414 | 					|| (c >= '\u0BBE' && c <= '\u0BC2')
415 | 					|| (c >= '\u0BC6' && c <= '\u0BC8')
416 | 					|| (c >= '\u0BCA' && c <= '\u0BCD') || (c == '\u0BD7')
417 | 					|| (c >= '\u0C01' && c <= '\u0C03')
418 | 					|| (c >= '\u0C3E' && c <= '\u0C44')
419 | 					|| (c >= '\u0C46' && c <= '\u0C48')
420 | 					|| (c >= '\u0C4A' && c <= '\u0C4D')
421 | 					|| (c >= '\u0C55' && c <= '\u0C56')
422 | 					|| (c >= '\u0C82' && c <= '\u0C83')
423 | 					|| (c >= '\u0CBE' && c <= '\u0CC4')
424 | 					|| (c >= '\u0CC6' && c <= '\u0CC8')
425 | 					|| (c >= '\u0CCA' && c <= '\u0CCD')
426 | 					|| (c >= '\u0CD5' && c <= '\u0CD6')
427 | 					|| (c >= '\u0D02' && c <= '\u0D03')
428 | 					|| (c >= '\u0D3E' && c <= '\u0D43')
429 | 					|| (c >= '\u0D46' && c <= '\u0D48')
430 | 					|| (c >= '\u0D4A' && c <= '\u0D4D') || (c == '\u0D57')
431 | 					|| (c == '\u0E31') || (c >= '\u0E34' && c <= '\u0E3A')
432 | 					|| (c >= '\u0E47' && c <= '\u0E4E') || (c == '\u0EB1')
433 | 					|| (c >= '\u0EB4' && c <= '\u0EB9')
434 | 					|| (c >= '\u0EBB' && c <= '\u0EBC')
435 | 					|| (c >= '\u0EC8' && c <= '\u0ECD')
436 | 					|| (c >= '\u0F18' && c <= '\u0F19') || (c == '\u0F35')
437 | 					|| (c == '\u0F37') || (c == '\u0F39') || (c == '\u0F3E')
438 | 					|| (c == '\u0F3F') || (c >= '\u0F71' && c <= '\u0F84')
439 | 					|| (c >= '\u0F86' && c <= '\u0F8B')
440 | 					|| (c >= '\u0F90' && c <= '\u0F95') || (c == '\u0F97')
441 | 					|| (c >= '\u0F99' && c <= '\u0FAD')
442 | 					|| (c >= '\u0FB1' && c <= '\u0FB7') || (c == '\u0FB9')
443 | 					|| (c >= '\u20D0' && c <= '\u20DC') || (c == '\u20E1')
444 | 					|| (c >= '\u302A' && c <= '\u302F') || (c == '\u3099')
445 | 					|| (c == '\u309A') || (c == '\u00B7') || (c == '\u02D0')
446 | 					|| (c == '\u02D1') || (c == '\u0387') || (c == '\u0640')
447 | 					|| (c == '\u0E46') || (c == '\u0EC6') || (c == '\u3005')
448 | 					|| (c >= '\u3031' && c <= '\u3035')
449 | 					|| (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE'));
450 | 		}
451 | 
452 | 		public static bool IsNCName(string str)
453 | 		{
454 | 			if (str == null)
455 | 			{
456 | 				return false;
457 | 			}
458 | 			else
459 | 			{
460 | 				int len = str.Length;
461 | 				switch (len)
462 | 				{
463 | 					case 0:
464 | 						return false;
465 | 					case 1:
466 | 						return NCName.IsNCNameStart(str[0]);
467 | 					default:
468 | 						if (!NCName.IsNCNameStart(str[0]))
469 | 						{
470 | 							return false;
471 | 						}
472 | 						for (int i = 1; i < len; i++)
473 | 						{
474 | 							if (!NCName.IsNCNameTrail(str[i]))
475 | 							{
476 | 								return false;
477 | 							}
478 | 						}
479 | 
480 | 						return true;
481 | 				}
482 | 			}
483 | 		}
484 | 
485 | 		private static void AppendUHexTo(StringBuilder sb, int c)
486 | 		{
487 | 			sb.Append('U');
488 | 			for (int i = 0; i < 6; i++)
489 | 			{
490 | 				sb.Append(HEX_TABLE[(c & 0xF00000) >> 20]);
491 | 				c <<= 4;
492 | 			}
493 | 		}
494 | 
495 | 		public static string EscapeName(string str)
496 | 		{
497 | 			StringBuilder sb = new StringBuilder();
498 | 			for (int i = 0; i < str.Length; i++)
499 | 			{
500 | 				char c = str[i];
501 | 				if ((c & 0xFC00) == 0xD800)
502 | 				{
503 | 					char next = str[++i];
504 | 					AppendUHexTo(sb, (c << 10) + next + SURROGATE_OFFSET);
505 | 				}
506 | 				else if (i == 0 && !IsNCNameStart(c))
507 | 				{
508 | 					AppendUHexTo(sb, c);
509 | 				}
510 | 				else if (i != 0 && !IsNCNameTrail(c))
511 | 				{
512 | 					AppendUHexTo(sb, c);
513 | 				}
514 | 				else
515 | 				{
516 | 					sb.Append(c);
517 | 				}
518 | 			}
519 | 			return String.Intern(sb.ToString());
520 | 		}
521 | 		// ]NOCPP]
522 | 	}
523 | 
524 | }
525 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/NamedCharacterAccel.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera 
  3 |  * Software ASA.
  4 |  * 
  5 |  * You are granted a license to use, reproduce and create derivative works of 
  6 |  * this document.
  7 |  */
  8 | 
  9 | namespace HtmlParserSharp.Core
 10 | {
 11 | 	public sealed class NamedCharactersAccel
 12 | 	{
 13 | 		internal static readonly int[][] HILO_ACCEL = new int[][] {
 14 | 			null,
 15 | 			null,
 16 | 			null,
 17 | 			null,
 18 | 			null,
 19 | 			null,
 20 | 			null,
 21 | 			null,
 22 | 			null,
 23 | 			null,
 24 | 			null,
 25 | 			null,
 26 | 			null,
 27 | 			null,
 28 | 			null,
 29 | 			null,
 30 | 			null,
 31 | 			null,
 32 | 			null,
 33 | 			null,
 34 | 			null,
 35 | 			null,
 36 | 			null,
 37 | 			null,
 38 | 			null,
 39 | 			null,
 40 | 			null,
 41 | 			null,
 42 | 			null,
 43 | 			null,
 44 | 			null,
 45 | 			null,
 46 | 			null,
 47 | 			null,
 48 | 			null,
 49 | 			null,
 50 | 			null,
 51 | 			null,
 52 | 			null,
 53 | 			null,
 54 | 			null,
 55 | 			null,
 56 | 			null,
 57 | 			null,
 58 | 			null,
 59 | 			null,
 60 | 			null,
 61 | 			null,
 62 | 			null,
 63 | 			null,
 64 | 			null,
 65 | 			null,
 66 | 			null,
 67 | 			null,
 68 | 			null,
 69 | 			null,
 70 | 			null,
 71 | 			null,
 72 | 			null,
 73 | 			null,
 74 | 			null,
 75 | 			null,
 76 | 			null,
 77 | 			null,
 78 | 			null,
 79 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 12386493, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 80 | 					0, 0, 0, 0, 0, 40174181, 0, 0, 0, 0, 60162966, 0, 0, 0,
 81 | 					75367550, 0, 0, 0, 82183396, 0, 0, 0, 0, 0, 115148507, 0,
 82 | 					0, 135989275, 139397199, 0, 0, 0, 0, },
 83 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28770743, 0,
 84 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 85 | 					82248935, 0, 0, 0, 0, 0, 115214046, 0, 0, 0, 139528272, 0,
 86 | 					0, 0, 0, },
 87 | 			null,
 88 | 			new int[] { 0, 0, 0, 4980811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 89 | 					0, 38470219, 0, 0, 0, 0, 0, 0, 0, 0, 64553944, 0, 0, 0, 0,
 90 | 					0, 0, 0, 92145022, 0, 0, 0, 0, 0, 0, 0, 0, 139593810, 0, 0,
 91 | 					0, 0, },
 92 | 			new int[] { 65536, 0, 0, 0, 0, 0, 0, 0, 13172937, 0, 0, 0, 0, 0, 25297282, 0,
 93 | 					0, 28901816, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 94 | 					71500866, 0, 0, 0, 0, 82380008, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 95 | 					0, 0, 0, 0, 0, },
 96 | 			null,
 97 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 98 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 99 | 					94897574, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
100 | 			new int[] { 0, 0, 2555943, 0, 0, 0, 0, 0, 0, 0, 15532269, 0, 0, 0, 0, 0, 0,
101 | 					0, 31785444, 34406924, 0, 0, 0, 0, 0, 40895088, 0, 0, 0,
102 | 					60228503, 0, 0, 0, 0, 0, 0, 0, 82445546, 0, 0, 0, 0, 0,
103 | 					115279583, 0, 0, 136054812, 0, 0, 0, 0, 0, },
104 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105 | 					0, 0, 40239718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
107 | 			new int[] { 0, 0, 0, 5046349, 0, 0, 10944679, 0, 13238474, 0, 15597806,
108 | 					16056565, 0, 20578618, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
109 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
110 | 					0, 0, 0, 0, 0, 0, },
111 | 			null,
112 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114 | 					95225257, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
115 | 			new int[] { 196610, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
118 | 			new int[] { 0, 0, 0, 0, 8454273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119 | 					0, 0, 0, 0, 0, 0, 0, 46072511, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
121 | 			new int[] { 0, 0, 2687016, 0, 0, 0, 0, 0, 13304011, 0, 0, 0, 0, 0, 0, 0, 0,
122 | 					0, 31850982, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
123 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
124 | 			null,
125 | 			null,
126 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 | 					34472462, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
128 | 					0, 0, 0, 95290798, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
129 | 			new int[] { 0, 0, 0, 5111886, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130 | 					34603535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
131 | 					0, 0, 0, 0, 105776718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
132 | 			new int[] { 0, 0, 0, 0, 8585346, 0, 11075752, 0, 0, 0, 0, 16187638, 0, 0, 0,
133 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
134 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
135 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28508594, 0, 0,
136 | 					0, 0, 0, 0, 0, 40305255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
138 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
139 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140 | 					95421871, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
141 | 			null,
142 | 			null,
143 | 			null,
144 | 			new int[] { 0, 0, 0, 5177423, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
145 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
146 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
147 | 			null,
148 | 			null,
149 | 			null,
150 | 			null,
151 | 			null,
152 | 			null,
153 | 			new int[] { 327684, 1900571, 2949162, 5374032, 8716420, 0, 11206826,
154 | 					12517566, 13435084, 0, 15663343, 16515320, 19988785,
155 | 					20644155, 25428355, 27197855, 0, 29163962, 31916519,
156 | 					34734609, 36045347, 0, 0, 0, 40436328, 40960625, 41615994,
157 | 					46596800, 54264627, 60556184, 64750554, 68879387, 71763012,
158 | 					75826303, 77268122, 0, 81462490, 83952875, 92865919,
159 | 					96142769, 105973327, 110167691, 0, 116917984, 121833283,
160 | 					132253665, 136251421, 140707923, 0, 0, 144574620,
161 | 					145361066, },
162 | 			new int[] { 393222, 0, 0, 0, 0, 0, 11272364, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
163 | 					0, 0, 36176423, 38535756, 0, 0, 0, 0, 41681532, 46727880,
164 | 					0, 60687261, 0, 0, 71828552, 75891846, 0, 0, 0, 84411650,
165 | 					0, 96404924, 0, 0, 0, 117376761, 121898820, 132319203,
166 | 					136382496, 0, 0, 0, 0, 0, },
167 | 			new int[] { 589831, 1966110, 3276846, 5505107, 8978566, 10420383, 11468973,
168 | 					12583104, 13631694, 15139046, 15794416, 16711933, 20054322,
169 | 					20840764, 25624965, 27263392, 0, 29360574, 32244200,
170 | 					34931219, 36373033, 38601293, 39584348, 0, 40567402,
171 | 					41091698, 42205821, 46858954, 54723389, 60818335, 65143773,
172 | 					68944924, 71959625, 75957383, 77530268, 80938194, 81593564,
173 | 					84739337, 92997002, 96863680, 106235474, 110233234, 0,
174 | 					117704448, 122816325, 132515812, 136579106, 140773476,
175 | 					142149753, 143001732, 144705695, 145492139, },
176 | 			new int[] { 0, 0, 3342387, 0, 9044106, 0, 11534512, 0, 13697233, 0, 0, 0, 0,
177 | 					0, 25690504, 0, 0, 0, 0, 0, 36438572, 38732366, 0, 0, 0,
178 | 					41157236, 0, 46924492, 54788932, 61080481, 65209315, 0,
179 | 					72025163, 0, 0, 0, 0, 85132558, 93062540, 96929223,
180 | 					106563158, 0, 0, 118032133, 123012947, 132581351,
181 | 					136775717, 140839013, 0, 143067271, 0, 145557677, },
182 | 			new int[] { 0, 2162719, 3473460, 5636181, 0, 0, 0, 0, 0, 0, 0, 18809088,
183 | 					20185395, 21299519, 0, 0, 0, 29622721, 0, 0, 0, 39256656,
184 | 					39649885, 0, 0, 41288309, 42336901, 47448781, 55182149,
185 | 					61342629, 65274852, 69010461, 72811596, 76219528, 77726880,
186 | 					0, 0, 86967572, 93128077, 97650120, 106628699, 110560915,
187 | 					0, 118490890, 123733846, 132646888, 0, 141232230,
188 | 					142411898, 0, 144836769, 145688750, },
189 | 			new int[] { 655370, 2228258, 3538998, 5701719, 9109643, 10485920, 11600049,
190 | 					12648641, 13762770, 15204584, 15859954, 18874656, 20250933,
191 | 					21365062, 25756041, 27328929, 28574132, 29688261, 32309741,
192 | 					34996758, 36504109, 39322200, 39715422, 39912033, 40632940,
193 | 					41353847, 42467975, 47514325, 55247691, 61473705, 65405925,
194 | 					69272606, 72877144, 76285068, 77857955, 81003732, 81659102,
195 | 					87164208, 93193614, 97715667, 106759772, 110626456,
196 | 					114296528, 118687505, 123864929, 132712425, 136906792,
197 | 					141297772, 142477438, 143132808, 144902307, 145754288, },
198 | 			new int[] { 786443, 0, 0, 0, 9240716, 0, 11665586, 0, 13893843, 0, 0, 0, 0,
199 | 					0, 25887114, 0, 0, 0, 0, 0, 36635182, 0, 0, 0, 0, 0,
200 | 					42599049, 0, 0, 0, 65733607, 0, 73008217, 0, 77989029, 0,
201 | 					81724639, 87295283, 0, 98305492, 107021918, 0, 0, 0, 0, 0,
202 | 					137037866, 0, 0, 0, 0, 0, },
203 | 			new int[] { 0, 0, 3604535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27394466, 0,
204 | 					29753798, 32571886, 35258903, 0, 0, 0, 0, 0, 0, 0, 0,
205 | 					55509836, 61604779, 0, 0, 0, 0, 0, 0, 81790176, 87557429,
206 | 					93259151, 98502109, 107152994, 110888601, 0, 119015188,
207 | 					124323683, 133498858, 137234476, 0, 0, 143263881, 0,
208 | 					145819825, },
209 | 			new int[] { 0, 0, 3866680, 6160472, 0, 10616993, 0, 12714178, 0, 0, 0, 0,
210 | 					20316470, 0, 0, 27460003, 0, 31261127, 32637426, 35521051,
211 | 					0, 0, 0, 39977570, 0, 0, 0, 48366294, 56492880, 62391213,
212 | 					0, 69338146, 73073755, 0, 78316711, 0, 0, 0, 93980048,
213 | 					98764256, 107218532, 111085213, 114362065, 119736089,
214 | 					125241194, 133957622, 0, 0, 0, 143329419, 144967844,
215 | 					145885362, },
216 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217 | 					0, 0, 0, 0, 0, 0, 0, 62456761, 0, 69403683, 73139292, 0,
218 | 					78382252, 0, 81855713, 87622969, 0, 98829796, 0, 0, 0, 0,
219 | 					0, 0, 0, 0, 0, 0, 0, 0, },
220 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 | 					0, 0, 0, 0, 0, 48431843, 0, 0, 0, 0, 0, 76416141, 0, 0, 0,
222 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
223 | 			new int[] { 851981, 0, 4063292, 0, 9306254, 0, 0, 0, 0, 0, 0, 19005729, 0, 0,
224 | 					0, 27525540, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42795659,
225 | 					49152740, 56623967, 62587834, 66061292, 69600292, 73401437,
226 | 					0, 0, 0, 0, 87950650, 94111131, 99878373, 107546213,
227 | 					112002720, 0, 119932708, 125306744, 0, 137496623,
228 | 					141363309, 0, 143460492, 0, 0, },
229 | 			new int[] { 917518, 0, 0, 0, 9502863, 0, 0, 0, 14155989, 0, 0, 19071267, 0,
230 | 					0, 26083724, 0, 0, 0, 32702963, 0, 36700720, 0, 0, 0, 0, 0,
231 | 					43057806, 0, 0, 0, 66520049, 0, 0, 0, 78841005, 81069269,
232 | 					0, 88147263, 0, 99943925, 107873898, 112068270, 0,
233 | 					120063783, 125831033, 0, 137693235, 0, 0, 143526030, 0, 0, },
234 | 			new int[] { 983055, 0, 0, 0, 0, 0, 0, 0, 14483673, 0, 0, 0, 0, 0, 0, 0, 0, 0,
235 | 					0, 0, 37093937, 0, 0, 0, 0, 0, 44565138, 49349359, 0, 0,
236 | 					66651128, 69665831, 73860193, 0, 79561908, 0, 0, 88606018,
237 | 					94176669, 0, 0, 0, 0, 120129321, 0, 0, 0, 141494382, 0,
238 | 					143591567, 0, 0, },
239 | 			new int[] { 1114128, 2293795, 4587583, 8257631, 9633938, 10813603, 11731123,
240 | 					12845251, 14680286, 15270121, 15925491, 19661092, 20382007,
241 | 					24969543, 26149263, 27656613, 28639669, 31392222, 32768500,
242 | 					35586591, 37225015, 39387737, 39780959, 40043107, 40698477,
243 | 					41419384, 44696233, 52495090, 57738081, 63439804, 66782202,
244 | 					69927976, 73925736, 76809359, 79824063, 81134806, 81921250,
245 | 					89785673, 94307742, 100795894, 107939439, 112330415,
246 | 					114427602, 120588074, 126158721, 134416381, 137824310,
247 | 					141559920, 142542975, 143853712, 145033381, 145950899, },
248 | 			new int[] { 1179666, 0, 0, 0, 9699476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26280336,
249 | 					0, 0, 0, 0, 0, 38076985, 0, 0, 0, 0, 0, 45220523, 52560674,
250 | 					0, 0, 67175420, 69993516, 0, 0, 79889603, 0, 0, 89916763,
251 | 					94373280, 101451267, 108136048, 0, 114493139, 120784689,
252 | 					126355334, 134481924, 138414136, 141625457, 142608512, 0,
253 | 					0, 0, },
254 | 			new int[] { 0, 0, 0, 0, 9896085, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
255 | 					33292789, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67830786, 0, 0,
256 | 					0, 80020676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127403913, 0, 0, 0,
257 | 					0, 0, 0, 0, },
258 | 			new int[] { 1310739, 2359332, 4653127, 0, 0, 0, 12189876, 0, 0, 0, 0, 0, 0,
259 | 					0, 26345874, 28246439, 0, 31457760, 0, 35652128, 38142534,
260 | 					0, 0, 0, 0, 0, 45351603, 52757283, 57869170, 63636425,
261 | 					67961868, 71304237, 73991273, 0, 0, 0, 0, 90309981, 0,
262 | 					101910029, 108988019, 114034355, 0, 120850228, 127469465,
263 | 					135464965, 138741825, 141690994, 142739585, 143984788, 0,
264 | 					0, },
265 | 			new int[] { 1441813, 2424869, 4718664, 8388735, 10027160, 10879142, 12255419,
266 | 					12976325, 14745825, 15401194, 15991028, 19857709, 20447544,
267 | 					25035134, 26542483, 28377520, 28705206, 31588833, 33358333,
268 | 					35783201, 38208071, 39453274, 39846496, 40108644, 40764014,
269 | 					41484921, 45613749, 53216038, 58196852, 63898572, 68158478,
270 | 					71369793, 74253418, 77005973, 80479430, 81265879, 81986787,
271 | 					90965347, 94504353, 103679508, 109250176, 114165453,
272 | 					114558676, 121243445, 127731610, 135727124, 138807366,
273 | 					142018675, 142805123, 144115862, 145098918, 146016436, },
274 | 			new int[] { 1572887, 0, 0, 0, 10092698, 0, 12320956, 0, 14811362, 0, 0,
275 | 					19923248, 0, 25166207, 26739094, 0, 0, 0, 33423870, 0,
276 | 					38273608, 0, 0, 0, 0, 0, 45744825, 0, 58262393, 64095184,
277 | 					68355089, 0, 75170926, 0, 80610509, 0, 0, 91817325, 0,
278 | 					104203823, 109512324, 0, 0, 121636667, 128059294, 0,
279 | 					139069511, 0, 0, 0, 0, 0, },
280 | 			new int[] { 1703961, 2490406, 4849737, 0, 10223771, 0, 0, 13107399, 15007971,
281 | 					15466732, 0, 0, 20513081, 25231745, 26870169, 0, 0,
282 | 					31654371, 34275839, 0, 38404681, 0, 0, 0, 40829551, 0,
283 | 					45875899, 53609261, 59900794, 64226259, 68551700, 0, 0, 0,
284 | 					80807119, 81331417, 0, 91948410, 94700963, 104465975,
285 | 					109643400, 114230991, 114951893, 121702209, 131663779, 0,
286 | 					139266123, 0, 0, 144246936, 145295527, 0, },
287 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27132315, 0, 0, 0, 0,
288 | 					0, 0, 39518811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75302012, 0,
289 | 					0, 0, 0, 92079484, 0, 105383483, 109708938, 0, 0, 0, 0, 0,
290 | 					0, 0, 0, 144312474, 0, 0, },
291 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
292 | 					0, 0, 0, 0, 46006973, 0, 60031891, 64291797, 0, 0, 0, 0, 0,
293 | 					0, 0, 0, 0, 105711177, 0, 0, 0, 0, 131991514, 135923736,
294 | 					139331662, 0, 0, 144378011, 0, 146147509, },
295 | 			new int[] { 0, 0, 0, 0, 10354845, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
296 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68813847, 0, 0, 0, 0, 0,
297 | 					0, 0, 0, 0, 0, 0, 0, 121767746, 0, 0, 0, 0, 0, 0, 0, 0, },
298 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 | 					0, 0, 0, 0, 0, 0, 60097429, 0, 0, 0, 0, 77137048, 0, 0, 0,
300 | 					0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
301 | 			new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
302 | 					0, 0, 0, 0, 0, 0, 0, 64422870, 0, 0, 0, 0, 0, 0, 0, 0, 0,
303 | 					0, 0, 0, 0, 0, 132122591, 0, 0, 142084216, 0, 0, 0, 0, }, };
304 | 
305 | 	}
306 | 
307 | }
308 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/Portability.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright (c) 2008-2009 Mozilla Foundation
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a 
  5 |  * copy of this software and associated documentation files (the "Software"), 
  6 |  * to deal in the Software without restriction, including without limitation 
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
  8 |  * and/or sell copies of the Software, and to permit persons to whom the 
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in 
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | using System;
 24 | using HtmlParserSharp.Common;
 25 | 
 26 | namespace HtmlParserSharp.Core
 27 | {
 28 | 	/// <summary>
 29 | 	/// Class for C++ portability.
 30 | 	/// TODO: Remove this
 31 | 	/// </summary>
 32 | 	public sealed class Portability
 33 | 	{
 34 | 		// Allocating methods
 35 | 
 36 | 		/// <summary>
 37 | 		/// Allocates a new local name object. In C++, the refcount must be set up in such a way that
 38 | 		/// calling <code>releaseLocal</code> on the return value balances the refcount set by this method.
 39 | 		/// </summary>
 40 | 		[Local]
 41 | 		public static String NewLocalNameFromBuffer(char[] buf, int offset, int length)
 42 | 		{
 43 | 			return string.Intern(new String(buf, offset, length));
 44 | 		}
 45 | 
 46 | 		// Comparison methods
 47 | 
 48 | 		public static bool LocalEqualsBuffer([Local] string local, char[] buf, int offset, int length)
 49 | 		{
 50 | 			if (local.Length != length)
 51 | 			{
 52 | 				return false;
 53 | 			}
 54 | 			for (int i = 0; i < length; i++)
 55 | 			{
 56 | 				if (local[i] != buf[offset + i])
 57 | 				{
 58 | 					return false;
 59 | 				}
 60 | 			}
 61 | 			return true;
 62 | 		}
 63 | 
 64 | 		public static bool LowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString(string lowerCaseLiteral,	string str)
 65 | 		{
 66 | 			if (str == null)
 67 | 			{
 68 | 				return false;
 69 | 			}
 70 | 			if (lowerCaseLiteral.Length > str.Length)
 71 | 			{
 72 | 				return false;
 73 | 			}
 74 | 			for (int i = 0; i < lowerCaseLiteral.Length; i++)
 75 | 			{
 76 | 				char c0 = lowerCaseLiteral[i];
 77 | 				char c1 = str[i];
 78 | 				if (c1 >= 'A' && c1 <= 'Z')
 79 | 				{
 80 | 					c1 += (char)0x20;
 81 | 				}
 82 | 				if (c0 != c1)
 83 | 				{
 84 | 					return false;
 85 | 				}
 86 | 			}
 87 | 			return true;
 88 | 		}
 89 | 
 90 | 		public static bool LowerCaseLiteralEqualsIgnoreAsciiCaseString(string lowerCaseLiteral, string str)
 91 | 		{
 92 | 			if (str == null)
 93 | 			{
 94 | 				return false;
 95 | 			}
 96 | 			if (lowerCaseLiteral.Length != str.Length)
 97 | 			{
 98 | 				return false;
 99 | 			}
100 | 			for (int i = 0; i < lowerCaseLiteral.Length; i++)
101 | 			{
102 | 				char c0 = lowerCaseLiteral[i];
103 | 				char c1 = str[i];
104 | 				if (c1 >= 'A' && c1 <= 'Z')
105 | 				{
106 | 					c1 += (char)0x20;
107 | 				}
108 | 				if (c0 != c1)
109 | 				{
110 | 					return false;
111 | 				}
112 | 			}
113 | 			return true;
114 | 		}
115 | 	}
116 | }
117 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/StackNode.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright (c) 2007 Henri Sivonen
  3 |  * Copyright (c) 2007-2011 Mozilla Foundation
  4 |  * Copyright (c) 2012 Patrick Reisert
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a 
  7 |  * copy of this software and associated documentation files (the "Software"), 
  8 |  * to deal in the Software without restriction, including without limitation 
  9 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 10 |  * and/or sell copies of the Software, and to permit persons to whom the 
 11 |  * Software is furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in 
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 21 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 22 |  * DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | using System;
 26 | using System.Diagnostics;
 27 | using HtmlParserSharp.Common;
 28 | 
 29 | namespace HtmlParserSharp.Core
 30 | {
 31 | 	public sealed class StackNode<T>
 32 | 	{
 33 | 		readonly int flags;
 34 | 
 35 | 		[Local]
 36 | 		internal readonly string name;
 37 | 
 38 | 		[Local]
 39 | 		internal readonly string popName;
 40 | 
 41 | 		[NsUri]
 42 | 		internal readonly string ns;
 43 | 
 44 | 		internal readonly T node;
 45 | 
 46 | 		// Only used on the list of formatting elements
 47 | 		internal HtmlAttributes attributes;
 48 | 
 49 | 		private int refcount = 1;
 50 | 
 51 | 		// [NOCPP[
 52 | 
 53 | 		private readonly TaintableLocator locator;
 54 | 
 55 | 		public TaintableLocator Locator
 56 | 		{
 57 | 			get
 58 | 			{
 59 | 				return locator;
 60 | 			}
 61 | 		}
 62 | 
 63 | 		// ]NOCPP]
 64 | 
 65 | 		public int Flags
 66 | 		{
 67 | 			get
 68 | 			{
 69 | 				return flags;
 70 | 			}
 71 | 		}
 72 | 
 73 | 		public DispatchGroup Group
 74 | 		{
 75 | 			get
 76 | 			{
 77 | 				return (DispatchGroup)(flags & ElementName.GROUP_MASK);
 78 | 			}
 79 | 		}
 80 | 
 81 | 		public bool IsScoping
 82 | 		{
 83 | 			get
 84 | 			{
 85 | 				return (flags & ElementName.SCOPING) != 0;
 86 | 			}
 87 | 		}
 88 | 
 89 | 		public bool IsSpecial
 90 | 		{
 91 | 			get
 92 | 			{
 93 | 				return (flags & ElementName.SPECIAL) != 0;
 94 | 			}
 95 | 		}
 96 | 
 97 | 		public bool IsFosterParenting
 98 | 		{
 99 | 			get
100 | 			{
101 | 				return (flags & ElementName.FOSTER_PARENTING) != 0;
102 | 			}
103 | 		}
104 | 
105 | 		public bool IsHtmlIntegrationPoint
106 | 		{
107 | 			get
108 | 			{
109 | 				return (flags & ElementName.HTML_INTEGRATION_POINT) != 0;
110 | 			}
111 | 		}
112 | 
113 | 		// [NOCPP[
114 | 
115 | 		public bool IsOptionalEndTag
116 | 		{
117 | 			get
118 | 			{
119 | 				return (flags & ElementName.OPTIONAL_END_TAG) != 0;
120 | 			}
121 | 		}
122 | 
123 | 		// ]NOCPP]
124 | 
125 | 		/// <summary>
126 | 		/// Constructor for copying. This doesn't take another <code>StackNode</code>
127 | 		/// because in C++ the caller is reponsible for reobtaining the local names
128 | 		/// from another interner.
129 | 		/// </summary>
130 | 		internal StackNode(int flags, [NsUri] String ns, [Local] String name, T node,
131 | 				[Local] String popName, HtmlAttributes attributes
132 | 			// [NOCPP[
133 | 				, TaintableLocator locator
134 | 			// ]NOCPP]
135 | 		)
136 | 		{
137 | 			this.flags = flags;
138 | 			this.name = name;
139 | 			this.popName = popName;
140 | 			this.ns = ns;
141 | 			this.node = node;
142 | 			this.attributes = attributes;
143 | 			this.refcount = 1;
144 | 			// [NOCPP[
145 | 			this.locator = locator;
146 | 			// ]NOCPP]
147 | 		}
148 | 
149 | 		/// <summary>
150 | 		/// Short hand for well-known HTML elements.
151 | 		/// </summary>
152 | 		internal StackNode(ElementName elementName, T node
153 | 			// [NOCPP[
154 | 				, TaintableLocator locator
155 | 			// ]NOCPP]
156 | 		)
157 | 		{
158 | 			this.flags = elementName.Flags;
159 | 			this.name = elementName.name;
160 | 			this.popName = elementName.name;
161 | 			this.ns = "http://www.w3.org/1999/xhtml";
162 | 			this.node = node;
163 | 			this.attributes = null;
164 | 			this.refcount = 1;
165 | 			Debug.Assert(!elementName.IsCustom, "Don't use this constructor for custom elements.");
166 | 			// [NOCPP[
167 | 			this.locator = locator;
168 | 			// ]NOCPP]
169 | 		}
170 | 
171 | 		/// <summary>
172 | 		/// Constructor for HTML formatting elements.
173 | 		/// </summary>
174 | 		internal StackNode(ElementName elementName, T node, HtmlAttributes attributes
175 | 			// [NOCPP[
176 | 				, TaintableLocator locator
177 | 			// ]NOCPP]
178 | 		)
179 | 		{
180 | 			this.flags = elementName.Flags;
181 | 			this.name = elementName.name;
182 | 			this.popName = elementName.name;
183 | 			this.ns = "http://www.w3.org/1999/xhtml";
184 | 			this.node = node;
185 | 			this.attributes = attributes;
186 | 			this.refcount = 1;
187 | 			Debug.Assert(!elementName.IsCustom, "Don't use this constructor for custom elements.");
188 | 			// [NOCPP[
189 | 			this.locator = locator;
190 | 			// ]NOCPP]
191 | 		}
192 | 
193 | 		/// <summary>
194 | 		/// The common-case HTML constructor.
195 | 		/// </summary>
196 | 		internal StackNode(ElementName elementName, T node, [Local] string popName
197 | 			// [NOCPP[
198 | 				, TaintableLocator locator
199 | 			// ]NOCPP]
200 | 		)
201 | 		{
202 | 			this.flags = elementName.Flags;
203 | 			this.name = elementName.name;
204 | 			this.popName = popName;
205 | 			this.ns = "http://www.w3.org/1999/xhtml";
206 | 			this.node = node;
207 | 			this.attributes = null;
208 | 			this.refcount = 1;
209 | 			// [NOCPP[
210 | 			this.locator = locator;
211 | 			// ]NOCPP]
212 | 		}
213 | 
214 | 		/// <summary>
215 | 		/// Constructor for SVG elements. Note that the order of the arguments is
216 | 		/// what distinguishes this from the HTML constructor. This is ugly, but
217 | 		/// AFAICT the least disruptive way to make this work with Java's generics
218 | 		/// and without unnecessary branches. :-(
219 | 		/// </summary>
220 | 		internal StackNode(ElementName elementName, [Local] string popName, T node
221 | 			// [NOCPP[
222 | 				, TaintableLocator locator
223 | 			// ]NOCPP]
224 | 		)
225 | 		{
226 | 			this.flags = PrepareSvgFlags(elementName.Flags);
227 | 			this.name = elementName.name;
228 | 			this.popName = popName;
229 | 			this.ns = "http://www.w3.org/2000/svg";
230 | 			this.node = node;
231 | 			this.attributes = null;
232 | 			this.refcount = 1;
233 | 			// [NOCPP[
234 | 			this.locator = locator;
235 | 			// ]NOCPP]
236 | 		}
237 | 
238 | 		/// <summary>
239 | 		/// Constructor for MathML.
240 | 		/// </summary>
241 | 		internal StackNode(ElementName elementName, T node, [Local] string popName,
242 | 				bool markAsIntegrationPoint
243 | 			// [NOCPP[
244 | 				, TaintableLocator locator
245 | 			// ]NOCPP]
246 | 		)
247 | 		{
248 | 			this.flags = PrepareMathFlags(elementName.Flags, markAsIntegrationPoint);
249 | 			this.name = elementName.name;
250 | 			this.popName = popName;
251 | 			this.ns = "http://www.w3.org/1998/Math/MathML";
252 | 			this.node = node;
253 | 			this.attributes = null;
254 | 			this.refcount = 1;
255 | 			// [NOCPP[
256 | 			this.locator = locator;
257 | 			// ]NOCPP]
258 | 		}
259 | 
260 | 		private static int PrepareSvgFlags(int flags)
261 | 		{
262 | 			flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING
263 | 					| ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG);
264 | 			if ((flags & ElementName.SCOPING_AS_SVG) != 0)
265 | 			{
266 | 				flags |= (ElementName.SCOPING | ElementName.SPECIAL | ElementName.HTML_INTEGRATION_POINT);
267 | 			}
268 | 			return flags;
269 | 		}
270 | 
271 | 		private static int PrepareMathFlags(int flags, bool markAsIntegrationPoint)
272 | 		{
273 | 			flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING
274 | 					| ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG);
275 | 			if ((flags & ElementName.SCOPING_AS_MATHML) != 0)
276 | 			{
277 | 				flags |= (ElementName.SCOPING | ElementName.SPECIAL);
278 | 			}
279 | 			if (markAsIntegrationPoint)
280 | 			{
281 | 				flags |= ElementName.HTML_INTEGRATION_POINT;
282 | 			}
283 | 			return flags;
284 | 		}
285 | 
286 | 		public void DropAttributes()
287 | 		{
288 | 			attributes = null;
289 | 		}
290 | 
291 | 		// [NOCPP[
292 | 
293 | 		/// <summary>
294 | 		/// Returns a <see cref="System.String"/> that represents this instance.
295 | 		/// </summary>
296 | 		/// <returns>
297 | 		/// A <see cref="System.String"/> that represents this instance.
298 | 		/// </returns>
299 | 		override public String ToString()
300 | 		{
301 | 			return name;
302 | 		}
303 | 
304 | 		// ]NOCPP]
305 | 
306 | 		// TODO: probably we won't need these
307 | 		public void Retain()
308 | 		{
309 | 			refcount++;
310 | 		}
311 | 
312 | 		public void Release()
313 | 		{
314 | 			refcount--;
315 | 			/*if (refcount == 0) {
316 | 				Portability.delete(this);
317 | 			}*/
318 | 		}
319 | 	}
320 | }
321 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/StateSnapshot.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright (c) 2009-2010 Mozilla Foundation
  3 |  * Copyright (c) 2012 Patrick Reisert
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining a 
  6 |  * copy of this software and associated documentation files (the "Software"), 
  7 |  * to deal in the Software without restriction, including without limitation 
  8 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
  9 |  * and/or sell copies of the Software, and to permit persons to whom the 
 10 |  * Software is furnished to do so, subject to the following conditions:
 11 |  *
 12 |  * The above copyright notice and this permission notice shall be included in 
 13 |  * all copies or substantial portions of the Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 20 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 21 |  * DEALINGS IN THE SOFTWARE.
 22 |  */
 23 | 
 24 | namespace HtmlParserSharp.Core
 25 | {
 26 | 	public class StateSnapshot<T> : ITreeBuilderState<T> where T : class
 27 | 	{
 28 | 		/// <summary>
 29 | 		/// Gets the stack.
 30 | 		/// </summary>
 31 | 		public StackNode<T>[] Stack { get; private set; }
 32 | 
 33 | 		/// <summary>
 34 | 		/// Gets the list of active formatting elements.
 35 | 		/// </summary>
 36 | 		public StackNode<T>[] ListOfActiveFormattingElements { get; private set; }
 37 | 
 38 | 		public T FormPointer { get; private set; }
 39 | 
 40 | 		public T HeadPointer { get; private set; }
 41 | 
 42 | 		public T DeepTreeSurrogateParent { get; private set; }
 43 | 
 44 | 		/// <summary>
 45 | 		/// Gets the mode.
 46 | 		/// </summary>
 47 | 		public InsertionMode Mode { get; private set; }
 48 | 
 49 | 		/// <summary>
 50 | 		/// Gets the original mode.
 51 | 		/// </summary>
 52 | 		public InsertionMode OriginalMode { get; private set; }
 53 | 
 54 | 		/// <summary>
 55 | 		/// Gets a value indicating whether this instance is frameset ok.
 56 | 		/// </summary>
 57 | 		/// <value>
 58 | 		/// 	<c>true</c> if this instance is frameset ok; otherwise, <c>false</c>.
 59 | 		/// </value>
 60 | 		public bool IsFramesetOk { get; private set; }
 61 | 
 62 | 		/// <summary>
 63 | 		/// Gets a value indicating whether this instance is need to drop LF.
 64 | 		/// </summary>
 65 | 		/// <value>
 66 | 		/// 	<c>true</c> if this instance is need to drop LF; otherwise, <c>false</c>.
 67 | 		/// </value>
 68 | 		public bool IsNeedToDropLF { get; private set; }
 69 | 
 70 | 		/// <summary>
 71 | 		/// Gets a value indicating whether this instance is quirks.
 72 | 		/// </summary>
 73 | 		/// <value>
 74 | 		///   <c>true</c> if this instance is quirks; otherwise, <c>false</c>.
 75 | 		/// </value>
 76 | 		public bool IsQuirks { get; private set; }
 77 | 
 78 | 		internal StateSnapshot(StackNode<T>[] stack,
 79 | 				StackNode<T>[] listOfActiveFormattingElements,
 80 | 				T formPointer,
 81 | 				T headPointer,
 82 | 				T deepTreeSurrogateParent,
 83 | 				InsertionMode mode,
 84 | 				InsertionMode originalMode,
 85 | 				bool framesetOk,
 86 | 				bool needToDropLF,
 87 | 				bool quirks)
 88 | 		{
 89 | 			Stack = stack;
 90 | 			ListOfActiveFormattingElements = listOfActiveFormattingElements;
 91 | 			FormPointer = formPointer;
 92 | 			HeadPointer = headPointer;
 93 | 			DeepTreeSurrogateParent = deepTreeSurrogateParent;
 94 | 			Mode = mode;
 95 | 			OriginalMode = originalMode;
 96 | 			IsFramesetOk = framesetOk;
 97 | 			IsNeedToDropLF = needToDropLF;
 98 | 			IsQuirks = quirks;
 99 | 		}
100 | 	}
101 | 
102 | }
103 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/TaintableLocator.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2011 Mozilla Foundation
 3 |  * Copyright (c) 2012 Patrick Reisert
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 6 |  * copy of this software and associated documentation files (the "Software"), 
 7 |  * to deal in the Software without restriction, including without limitation 
 8 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 9 |  * and/or sell copies of the Software, and to permit persons to whom the 
10 |  * Software is furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in 
13 |  * all copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
20 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
21 |  * DEALINGS IN THE SOFTWARE.
22 |  */
23 | 
24 | namespace HtmlParserSharp.Core
25 | {
26 | 	public class TaintableLocator : Locator
27 | 	{
28 | 		public TaintableLocator(ILocator locator)
29 | 			: base(locator)
30 | 		{
31 | 			IsTainted = false;
32 | 		}
33 | 
34 | 		public void MarkTainted()
35 | 		{
36 | 			IsTainted = true;
37 | 		}
38 | 
39 | 		public bool IsTainted { get; private set; }
40 | 	}
41 | }
42 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/TreeBuilderConstants.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * The comments following this one that use the same comment syntax as this 
 3 |  * comment are quotes from the WHATWG HTML 5 spec as of 27 June 2007 
 4 |  * amended as of June 28 2007.
 5 |  * That document came with this statement:
 6 |  * © Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, and 
 7 |  * Opera Software ASA. You are granted a license to use, reproduce and 
 8 |  * create derivative works of this document."
 9 |  */
10 | 
11 | using HtmlParserSharp.Common;
12 | 
13 | namespace HtmlParserSharp.Core
14 | {
15 | 	/// <summary>
16 | 	/// Moved the constants (and pseude-enums) out of the TreeBuilder class.
17 | 	/// </summary>
18 | 	public class TreeBuilderConstants
19 | 	{
20 | 		/// <summary>
21 | 		/// Array version of U+FFFD.
22 | 		/// </summary>
23 | 		internal static readonly char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
24 | 
25 | 		// [NOCPP[
26 | 
27 | 		internal readonly static string[] HTML4_PUBLIC_IDS = {
28 | 			"-//W3C//DTD HTML 4.0 Frameset//EN",
29 | 			"-//W3C//DTD HTML 4.0 Transitional//EN",
30 | 			"-//W3C//DTD HTML 4.0//EN", "-//W3C//DTD HTML 4.01 Frameset//EN",
31 | 			"-//W3C//DTD HTML 4.01 Transitional//EN",
32 | 			"-//W3C//DTD HTML 4.01//EN" };
33 | 
34 | 		// ]NOCPP]
35 | 
36 | 		internal readonly static string[] QUIRKY_PUBLIC_IDS = {
37 | 			"+//silmaril//dtd html pro v0r11 19970101//",
38 | 			"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
39 | 			"-//as//dtd html 3.0 aswedit + extensions//",
40 | 			"-//ietf//dtd html 2.0 level 1//",
41 | 			"-//ietf//dtd html 2.0 level 2//",
42 | 			"-//ietf//dtd html 2.0 strict level 1//",
43 | 			"-//ietf//dtd html 2.0 strict level 2//",
44 | 			"-//ietf//dtd html 2.0 strict//",
45 | 			"-//ietf//dtd html 2.0//",
46 | 			"-//ietf//dtd html 2.1e//",
47 | 			"-//ietf//dtd html 3.0//",
48 | 			"-//ietf//dtd html 3.2 final//",
49 | 			"-//ietf//dtd html 3.2//",
50 | 			"-//ietf//dtd html 3//",
51 | 			"-//ietf//dtd html level 0//",
52 | 			"-//ietf//dtd html level 1//",
53 | 			"-//ietf//dtd html level 2//",
54 | 			"-//ietf//dtd html level 3//",
55 | 			"-//ietf//dtd html strict level 0//",
56 | 			"-//ietf//dtd html strict level 1//",
57 | 			"-//ietf//dtd html strict level 2//",
58 | 			"-//ietf//dtd html strict level 3//",
59 | 			"-//ietf//dtd html strict//",
60 | 			"-//ietf//dtd html//",
61 | 			"-//metrius//dtd metrius presentational//",
62 | 			"-//microsoft//dtd internet explorer 2.0 html strict//",
63 | 			"-//microsoft//dtd internet explorer 2.0 html//",
64 | 			"-//microsoft//dtd internet explorer 2.0 tables//",
65 | 			"-//microsoft//dtd internet explorer 3.0 html strict//",
66 | 			"-//microsoft//dtd internet explorer 3.0 html//",
67 | 			"-//microsoft//dtd internet explorer 3.0 tables//",
68 | 			"-//netscape comm. corp.//dtd html//",
69 | 			"-//netscape comm. corp.//dtd strict html//",
70 | 			"-//o'reilly and associates//dtd html 2.0//",
71 | 			"-//o'reilly and associates//dtd html extended 1.0//",
72 | 			"-//o'reilly and associates//dtd html extended relaxed 1.0//",
73 | 			"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
74 | 			"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
75 | 			"-//spyglass//dtd html 2.0 extended//",
76 | 			"-//sq//dtd html 2.0 hotmetal + extensions//",
77 | 			"-//sun microsystems corp.//dtd hotjava html//",
78 | 			"-//sun microsystems corp.//dtd hotjava strict html//",
79 | 			"-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//",
80 | 			"-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//",
81 | 			"-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//",
82 | 			"-//w3c//dtd html 4.0 transitional//",
83 | 			"-//w3c//dtd html experimental 19960712//",
84 | 			"-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//",
85 | 			"-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//",
86 | 			"-//webtechs//dtd mozilla html//" };
87 | 
88 | 		internal const int NOT_FOUND_ON_STACK = int.MaxValue;
89 | 
90 | 		// [NOCPP[
91 | 
92 | 		[Local]
93 | 		internal const string HTML_LOCAL = "html";
94 | 
95 | 		// ]NOCPP]
96 | 	}
97 | }
98 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Core/UTF16Buffer.cs:
--------------------------------------------------------------------------------
 1 | ﻿/*
 2 |  * Copyright (c) 2008-2010 Mozilla Foundation
 3 |  * Copyright (c) 2012 Patrick Reisert 
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a 
 6 |  * copy of this software and associated documentation files (the "Software"), 
 7 |  * to deal in the Software without restriction, including without limitation 
 8 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 9 |  * and/or sell copies of the Software, and to permit persons to whom the 
10 |  * Software is furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in 
13 |  * all copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
20 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
21 |  * DEALINGS IN THE SOFTWARE.
22 |  */
23 | 
24 | namespace HtmlParserSharp.Core
25 | {
26 | 	/// <summary>
27 | 	/// An UTF-16 buffer that knows the start and end indeces of its unconsumed
28 | 	/// content.
29 | 	/// </summary>
30 | 	public sealed class UTF16Buffer
31 | 	{
32 | 		/// <summary>
33 | 		/// Gets the backing store of the buffer. May be larger than the logical content
34 | 		/// of this <code>UTF16Buffer</code>.
35 | 		/// </summary>
36 | 		public char[] Buffer { get; private set; }
37 | 
38 | 		/// <summary>
39 | 		/// Gets or sets the index of the first unconsumed character in the backing buffer.
40 | 		/// </summary>
41 | 		public int Start { get; set; }
42 | 
43 | 		/// <summary>
44 | 		/// Gets or sets the index of the slot immediately after the last character in the backing
45 | 		/// buffer that is part of the logical content of this <code>UTF16Buffer</code>.
46 | 		/// </summary>
47 | 		public int End { get; set; }
48 | 
49 | 		/// <summary>
50 | 		/// Constructor for wrapping an existing UTF-16 code unit array.
51 | 		/// </summary>
52 | 		/// <param name="buffer">The backing buffer.</param>
53 | 		/// <param name="start">The index of the first character to consume.</param>
54 | 		/// <param name="end">The index immediately after the last character to consume.</param>
55 | 		public UTF16Buffer(char[] buffer, int start, int end)
56 | 		{
57 | 			Buffer = buffer;
58 | 			Start = start;
59 | 			End = end;
60 | 		}
61 | 
62 | 		/// <summary>
63 | 		/// Determines whether this instance has data left.
64 | 		/// </summary>
65 | 		/// <returns>
66 | 		///   <c>true</c> if there's data left; otherwise, <c>false</c>.
67 | 		/// </returns>
68 | 		public bool HasMore
69 | 		{
70 | 			get
71 | 			{
72 | 				return Start < End;
73 | 			}
74 | 		}
75 | 
76 | 		/// <summary>
77 | 		/// Adjusts the start index to skip over the first character if it is a line
78 | 		/// feed and the previous character was a carriage return.
79 | 		/// </summary>
80 | 		/// <param name="lastWasCR">Whether the previous character was a carriage return.</param>
81 | 		public void Adjust(bool lastWasCR)
82 | 		{
83 | 			if (lastWasCR && Buffer[Start] == '\n')
84 | 			{
85 | 				Start++;
86 | 			}
87 | 		}
88 | 	}
89 | }
90 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/HtmlParserSharp.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
 5 |     <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
 6 |     <ProductVersion>8.0.30703</ProductVersion>
 7 |     <SchemaVersion>2.0</SchemaVersion>
 8 |     <ProjectGuid>{FD150915-D34F-436A-92C1-80AA505DA754}</ProjectGuid>
 9 |     <OutputType>Exe</OutputType>
10 |     <AppDesignerFolder>Properties</AppDesignerFolder>
11 |     <RootNamespace>HtmlParserSharp</RootNamespace>
12 |     <AssemblyName>HtmlParser</AssemblyName>
13 |     <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
14 |     <FileAlignment>512</FileAlignment>
15 |   </PropertyGroup>
16 |   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
17 |     <DebugSymbols>true</DebugSymbols>
18 |     <DebugType>full</DebugType>
19 |     <Optimize>false</Optimize>
20 |     <OutputPath>bin\Debug\</OutputPath>
21 |     <DefineConstants>DEBUG;TRACE</DefineConstants>
22 |     <ErrorReport>prompt</ErrorReport>
23 |     <WarningLevel>4</WarningLevel>
24 |   </PropertyGroup>
25 |   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
26 |     <DebugType>pdbonly</DebugType>
27 |     <Optimize>true</Optimize>
28 |     <OutputPath>bin\Release\</OutputPath>
29 |     <DefineConstants>TRACE</DefineConstants>
30 |     <ErrorReport>prompt</ErrorReport>
31 |     <WarningLevel>4</WarningLevel>
32 |   </PropertyGroup>
33 |   <PropertyGroup>
34 |     <StartupObject />
35 |   </PropertyGroup>
36 |   <ItemGroup>
37 |     <Reference Include="System" />
38 |     <Reference Include="System.Core" />
39 |     <Reference Include="System.Xml.Linq" />
40 |     <Reference Include="System.Data.DataSetExtensions" />
41 |     <Reference Include="Microsoft.CSharp" />
42 |     <Reference Include="System.Data" />
43 |     <Reference Include="System.Xml" />
44 |   </ItemGroup>
45 |   <ItemGroup>
46 |     <Compile Include="Core\AttributeName.cs" />
47 |     <Compile Include="Core\CharsetState.cs" />
48 |     <Compile Include="Core\CoalescingTreeBuilder.cs" />
49 |     <Compile Include="Common\DoctypeExpectation.cs" />
50 |     <Compile Include="Common\DocumentMode.cs" />
51 |     <Compile Include="Common\DocumentModeEventArgs.cs" />
52 |     <Compile Include="Common\EncodingDetectedEventArgs.cs" />
53 |     <Compile Include="Common\XmlViolationPolicy.cs" />
54 |     <Compile Include="Core\DispatchGroup.cs" />
55 |     <Compile Include="Core\DomTreeBuilder.cs" />
56 |     <Compile Include="Core\ElementName.cs" />
57 |     <Compile Include="Core\HtmlAttributes.cs" />
58 |     <Compile Include="Core\InsertionMode.cs" />
59 |     <Compile Include="Program.cs" />
60 |     <Compile Include="Core\ILocator.cs" />
61 |     <Compile Include="Common\ITokenHandler.cs" />
62 |     <Compile Include="Core\ITreeBuilderState.cs" />
63 |     <Compile Include="Core\Locator.cs" />
64 |     <Compile Include="Core\NamedCharacterAccel.cs" />
65 |     <Compile Include="Core\NamedCharacters.cs" />
66 |     <Compile Include="Core\NCName.cs" />
67 |     <Compile Include="Common\Attributes.cs" />
68 |     <Compile Include="Common\ParserErrorEventArgs.cs" />
69 |     <Compile Include="Core\Portability.cs" />
70 |     <Compile Include="Properties\AssemblyInfo.cs" />
71 |     <Compile Include="SimpleHtmlParser.cs" />
72 |     <Compile Include="Core\StackNode.cs" />
73 |     <Compile Include="Core\StateSnapshot.cs" />
74 |     <Compile Include="Core\TaintableLocator.cs" />
75 |     <Compile Include="Core\Tokenizer.cs" />
76 |     <Compile Include="Core\TreeBuilder.cs" />
77 |     <Compile Include="Core\TreeBuilderConstants.cs" />
78 |     <Compile Include="Core\UTF16Buffer.cs" />
79 |   </ItemGroup>
80 |   <ItemGroup>
81 |     <Service Include="{508349B6-6B84-4DF5-91F0-309BEEBAD82D}" />
82 |   </ItemGroup>
83 |   <ItemGroup>
84 |     <Content Include="SampleData\test.html">
85 |       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
86 |     </Content>
87 |   </ItemGroup>
88 |   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
89 |   <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
90 |        Other similar extension points exist, see Microsoft.Common.targets.
91 |   <Target Name="BeforeBuild">
92 |   </Target>
93 |   <Target Name="AfterBuild">
94 |   </Target>
95 |   -->
96 | </Project>


--------------------------------------------------------------------------------
/HtmlParserSharp/Program.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright (c) 2012 Patrick Reisert
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a 
  5 |  * copy of this software and associated documentation files (the "Software"), 
  6 |  * to deal in the Software without restriction, including without limitation 
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
  8 |  * and/or sell copies of the Software, and to permit persons to whom the 
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in 
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | using System;
 24 | using System.Collections.Generic;
 25 | using System.Diagnostics;
 26 | using System.IO;
 27 | using System.Linq;
 28 | using System.Xml.Linq;
 29 | 
 30 | namespace HtmlParserSharp
 31 | {
 32 | 	/// <summary>
 33 | 	/// This is contains a sample entry point for testing and benchmarks.
 34 | 	/// </summary>
 35 | 	public class Program
 36 | 	{
 37 | 		static SimpleHtmlParser parser = new SimpleHtmlParser();
 38 | 
 39 | 		private static IEnumerable<FileInfo> GetTestFiles()
 40 | 		{
 41 | 			//DirectoryInfo dir = new DirectoryInfo("SampleData");
 42 | 			//return dir.GetFiles("*.html", SearchOption.AllDirectories);
 43 | 			for (int i = 0; i < 10; i++)
 44 | 			{
 45 | 				yield return new FileInfo(Path.Combine("SampleData", "test.html"));
 46 | 			}
 47 | 		}
 48 | 
 49 | 		public static void Main(string[] args)
 50 | 		{
 51 | 			//var fragment1 = parser.ParseStringFragment("<td>foo", "");
 52 | 			//var fragment2 = parser.ParseStringFragment("<td>foo", "table");
 53 | 
 54 | 			Stopwatch sw = new Stopwatch();
 55 | 
 56 | 
 57 | 			Console.Write("Parsing ... ");
 58 | 			var result = GetTestFiles().Select((file) =>
 59 | 				{
 60 | 					sw.Restart();
 61 | 					var doc = parser.Parse(file.FullName);
 62 | 					sw.Stop();
 63 | 					var parseTime = sw.Elapsed;
 64 | 					doc.Save("test.xml");
 65 | 					sw.Restart();
 66 | 					XDocument.Load("test.xml");
 67 | 					sw.Stop();
 68 | 					var reparseTime = sw.Elapsed;
 69 | 					return new { Document = doc, Time = parseTime, ReparseTime = reparseTime };
 70 | 				}
 71 | 				).ToList();
 72 | 
 73 | 			TimeSpan total = result.Aggregate(new TimeSpan(), (passed, current) => passed + current.Time);
 74 | 			TimeSpan reparseTotal = result.Aggregate(new TimeSpan(), (passed, current) => passed + current.ReparseTime);
 75 | 
 76 | 			Console.WriteLine("done.");
 77 | 			Console.WriteLine("Found " + result.Count + " documents.");
 78 | 			Console.WriteLine();
 79 | 			PrintTime("Total", total);
 80 | 			PrintTime("First", result.First().Time);
 81 | 			PrintTime("Average", TimeSpan.FromTicks(total.Ticks / result.Count));
 82 | 			PrintTime("Average (without first)", TimeSpan.FromTicks((total.Ticks - result.First().Time.Ticks) / (result.Count - 1)));
 83 | 			PrintTime("Min", result.Min(val => val.Time));
 84 | 			PrintTime("Max", result.Max(val => val.Time));
 85 | 
 86 | 			Console.WriteLine();
 87 | 			Console.WriteLine("=== Reparsing (XDocument) ===");
 88 | 
 89 | 			// note: reparsing using XmlDocument instead gives similar results
 90 | 
 91 | 			PrintTime("Total", reparseTotal);
 92 | 			PrintTime("First", result.First().ReparseTime);
 93 | 			PrintTime("Average", TimeSpan.FromTicks(reparseTotal.Ticks / result.Count));
 94 | 			PrintTime("Average (without first)", TimeSpan.FromTicks((reparseTotal.Ticks - result.First().ReparseTime.Ticks) / (result.Count - 1)));
 95 | 			PrintTime("Min", result.Min(val => val.ReparseTime));
 96 | 			PrintTime("Max", result.Max(val => val.ReparseTime));
 97 | 			Console.ReadKey();
 98 | 		}
 99 | 
100 | 		private static void PrintTime(string caption, TimeSpan time)
101 | 		{
102 | 			Console.WriteLine("{0}:\n  {1} ({2} ms)", caption, time.ToString(), time.TotalMilliseconds);
103 | 		}
104 | 
105 | 
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System.Reflection;
 2 | using System.Runtime.CompilerServices;
 3 | using System.Runtime.InteropServices;
 4 | 
 5 | // Allgemeine Informationen über eine Assembly werden über die folgenden 
 6 | // Attribute gesteuert. Ändern Sie diese Attributwerte, um die Informationen zu ändern,
 7 | // die mit einer Assembly verknüpft sind.
 8 | [assembly: AssemblyTitle("HtmlParser")]
 9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("")]
12 | [assembly: AssemblyProduct("HtmlParser")]
13 | [assembly: AssemblyCopyright("Copyright ©  2012")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 | 
17 | // Durch Festlegen von ComVisible auf "false" werden die Typen in dieser Assembly unsichtbar 
18 | // für COM-Komponenten. Wenn Sie auf einen Typ in dieser Assembly von 
19 | // COM zugreifen müssen, legen Sie das ComVisible-Attribut für diesen Typ auf "true" fest.
20 | [assembly: ComVisible(false)]
21 | 
22 | // Die folgende GUID bestimmt die ID der Typbibliothek, wenn dieses Projekt für COM verfügbar gemacht wird
23 | [assembly: Guid("dd2311df-4aa1-4f09-8fff-751cd048e652")]
24 | 
25 | // Versionsinformationen für eine Assembly bestehen aus den folgenden vier Werten:
26 | //
27 | //      Hauptversion
28 | //      Nebenversion 
29 | //      Buildnummer
30 | //      Revision
31 | //
32 | // Sie können alle Werte angeben oder die standardmäßigen Build- und Revisionsnummern 
33 | // übernehmen, indem Sie "*" eingeben:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 | 


--------------------------------------------------------------------------------
/HtmlParserSharp/SampleData/test.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Boddlnagg/HtmlParserSharp/bd48da4f4ce3b6309e32677bd1bebafd42ba280b/HtmlParserSharp/SampleData/test.html


--------------------------------------------------------------------------------
/HtmlParserSharp/SimpleHtmlParser.cs:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 |  * Copyright (c) 2012 Patrick Reisert
  3 |  * Copyright (c) 2005, 2006, 2007 Henri Sivonen
  4 |  * Copyright (c) 2007-2008 Mozilla Foundation
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a 
  7 |  * copy of this software and associated documentation files (the "Software"), 
  8 |  * to deal in the Software without restriction, including without limitation 
  9 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 10 |  * and/or sell copies of the Software, and to permit persons to whom the 
 11 |  * Software is furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in 
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 21 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 22 |  * DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | using System;
 26 | using System.IO;
 27 | using System.Xml;
 28 | using HtmlParserSharp.Core;
 29 | 
 30 | namespace HtmlParserSharp
 31 | {
 32 | 	/// <summary>
 33 | 	/// This is a simple API for the parsing process.
 34 | 	/// Part of this is a port of the nu.validator.htmlparser.io.Driver class.
 35 | 	/// The parser currently ignores the encoding in the html source and parses everything as UTF-8.
 36 | 	/// </summary>
 37 | 	public class SimpleHtmlParser
 38 | 	{
 39 | 		private Tokenizer tokenizer;
 40 | 		private DomTreeBuilder treeBuilder;
 41 | 
 42 | 		public XmlDocumentFragment ParseStringFragment(string str, string fragmentContext)
 43 | 		{
 44 | 			using (var reader = new StringReader(str))
 45 | 				return ParseFragment(reader, fragmentContext);
 46 | 		}
 47 | 
 48 | 		public XmlDocument ParseString(string str)
 49 | 		{
 50 | 			using (var reader = new StringReader(str))
 51 | 				return Parse(reader);
 52 | 		}
 53 | 
 54 | 		public XmlDocument Parse(string path)
 55 | 		{
 56 | 			using (var reader = new StreamReader(path))
 57 | 				return Parse(reader);
 58 | 		}
 59 | 
 60 | 		public XmlDocument Parse(TextReader reader)
 61 | 		{
 62 | 			Reset();
 63 | 			Tokenize(reader);
 64 | 			return treeBuilder.Document;
 65 | 		}
 66 | 
 67 | 		public XmlDocumentFragment ParseFragment(TextReader reader, string fragmentContext)
 68 | 		{
 69 | 			Reset();
 70 | 			treeBuilder.SetFragmentContext(fragmentContext);
 71 | 			Tokenize(reader);
 72 | 			return treeBuilder.getDocumentFragment();
 73 | 		}
 74 | 
 75 | 		private void Reset()
 76 | 		{
 77 | 			treeBuilder = new DomTreeBuilder();
 78 | 			tokenizer = new Tokenizer(treeBuilder, false);
 79 | 			treeBuilder.IsIgnoringComments = false;
 80 | 
 81 | 			// optionally: report errors and more
 82 | 
 83 | 			//treeBuilder.ErrorEvent +=
 84 | 			//    (sender, a) =>
 85 | 			//    {
 86 | 			//        ILocator loc = tokenizer as ILocator;
 87 | 			//        Console.WriteLine("{0}: {1} (Line: {2})", a.IsWarning ? "Warning" : "Error", a.Message, loc.LineNumber);
 88 | 			//    };
 89 | 			//treeBuilder.DocumentModeDetected += (sender, a) => Console.WriteLine("Document mode: " + a.Mode.ToString());
 90 | 			//tokenizer.EncodingDeclared += (sender, a) => Console.WriteLine("Encoding: " + a.Encoding + " (currently ignored)");
 91 | 		}
 92 | 
 93 | 		private void Tokenize(TextReader reader)
 94 | 		{
 95 | 			if (reader == null)
 96 | 			{
 97 | 				throw new ArgumentNullException("reader was null.");
 98 | 			}
 99 | 
100 | 			tokenizer.Start();
101 | 			bool swallowBom = true;
102 | 
103 | 			try
104 | 			{
105 | 				char[] buffer = new char[2048];
106 | 				UTF16Buffer bufr = new UTF16Buffer(buffer, 0, 0);
107 | 				bool lastWasCR = false;
108 | 				int len = -1;
109 | 				if ((len = reader.Read(buffer, 0, buffer.Length)) != 0)
110 | 				{
111 | 					int streamOffset = 0;
112 | 					int offset = 0;
113 | 					int length = len;
114 | 					if (swallowBom)
115 | 					{
116 | 						if (buffer[0] == '\uFEFF')
117 | 						{
118 | 							streamOffset = -1;
119 | 							offset = 1;
120 | 							length--;
121 | 						}
122 | 					}
123 | 					if (length > 0)
124 | 					{
125 | 						tokenizer.SetTransitionBaseOffset(streamOffset);
126 | 						bufr.Start = offset;
127 | 						bufr.End = offset + length;
128 | 						while (bufr.HasMore)
129 | 						{
130 | 							bufr.Adjust(lastWasCR);
131 | 							lastWasCR = false;
132 | 							if (bufr.HasMore)
133 | 							{
134 | 								lastWasCR = tokenizer.TokenizeBuffer(bufr);
135 | 							}
136 | 						}
137 | 					}
138 | 					streamOffset = length;
139 | 					while ((len = reader.Read(buffer, 0, buffer.Length)) != 0)
140 | 					{
141 | 						tokenizer.SetTransitionBaseOffset(streamOffset);
142 | 						bufr.Start = 0;
143 | 						bufr.End = len;
144 | 						while (bufr.HasMore)
145 | 						{
146 | 							bufr.Adjust(lastWasCR);
147 | 							lastWasCR = false;
148 | 							if (bufr.HasMore)
149 | 							{
150 | 								lastWasCR = tokenizer.TokenizeBuffer(bufr);
151 | 							}
152 | 						}
153 | 						streamOffset += len;
154 | 					}
155 | 				}
156 | 				tokenizer.Eof();
157 | 			}
158 | 			finally
159 | 			{
160 | 				tokenizer.End();
161 | 			}
162 | 		}
163 | 	}
164 | }
165 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | HtmlParserSharp
 2 | ===============
 3 | 
 4 | This is a manual C# port of the [Validator.nu HTML Parser](http://about.validator.nu/htmlparser/), a HTML5 parser originally written in Java and (compiled to C++ using the Google Web Toolkit) used by Mozilla's Gecko rendering engine. The port uses the DOM implemented in [System.Xml](http://msdn.microsoft.com/en-us/library/system.xml.aspx).
 5 | 
 6 | Status
 7 | ------
 8 | PLEASE SEE https://github.com/jamietre/HtmlParserSharp FOR AN ACTIVELY MAINTAINED VERSION OF THIS PROJECT.
 9 | 
10 | Currently the port is based on Validator.nu 1.3.1 and works, as far as I have tested it. However as there are no unit tests, I'm not sure if every detail is working correctly. Tests showed that it is quite fast (about 3-6 times slower than parsing XML using .NET's XDocument API, but I think XML parsing is easier to implement, so this is okay and it's still FAST).
11 | 
12 | What's missing
13 | --------------
14 | If you want to contribute, maybe you can start here:
15 | 
16 | * Support for character encodings other than UTF-8
17 | * More C#-ish coding style
18 | * Unit tests
19 | * Look for TODOs in the code
20 | 


--------------------------------------------------------------------------------