├── .gitignore ├── HtmlParserSharp.sln ├── HtmlParserSharp ├── Common │ ├── Attributes.cs │ ├── DoctypeExpectation.cs │ ├── DocumentMode.cs │ ├── DocumentModeEventArgs.cs │ ├── EncodingDetectedEventArgs.cs │ ├── ITokenHandler.cs │ ├── ParserErrorEventArgs.cs │ └── XmlViolationPolicy.cs ├── Core │ ├── AttributeName.cs │ ├── CharsetState.cs │ ├── CoalescingTreeBuilder.cs │ ├── DispatchGroup.cs │ ├── DomTreeBuilder.cs │ ├── ElementName.cs │ ├── HtmlAttributes.cs │ ├── ILocator.cs │ ├── ITreeBuilderState.cs │ ├── InsertionMode.cs │ ├── Locator.cs │ ├── NCName.cs │ ├── NamedCharacterAccel.cs │ ├── NamedCharacters.cs │ ├── Portability.cs │ ├── StackNode.cs │ ├── StateSnapshot.cs │ ├── TaintableLocator.cs │ ├── Tokenizer.cs │ ├── TreeBuilder.cs │ ├── TreeBuilderConstants.cs │ └── UTF16Buffer.cs ├── HtmlParserSharp.csproj ├── Program.cs ├── Properties │ └── AssemblyInfo.cs ├── SampleData │ └── test.html └── SimpleHtmlParser.cs └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.suo 2 | obj/ 3 | bin/ -------------------------------------------------------------------------------- /HtmlParserSharp.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 11.00 3 | # Visual Studio 2010 4 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HtmlParserSharp", "HtmlParserSharp\HtmlParserSharp.csproj", "{FD150915-D34F-436A-92C1-80AA505DA754}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Any CPU = Debug|Any CPU 9 | Release|Any CPU = Release|Any CPU 10 | EndGlobalSection 11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 12 | {FD150915-D34F-436A-92C1-80AA505DA754}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 13 | {FD150915-D34F-436A-92C1-80AA505DA754}.Debug|Any CPU.Build.0 = Debug|Any CPU 14 | {FD150915-D34F-436A-92C1-80AA505DA754}.Release|Any CPU.ActiveCfg = Release|Any CPU 15 | {FD150915-D34F-436A-92C1-80AA505DA754}.Release|Any CPU.Build.0 = Release|Any CPU 16 | EndGlobalSection 17 | GlobalSection(SolutionProperties) = preSolution 18 | HideSolutionNode = FALSE 19 | EndGlobalSection 20 | EndGlobal 21 | -------------------------------------------------------------------------------- /HtmlParserSharp/Common/Attributes.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Patrick Reisert 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | using System; 24 | 25 | namespace HtmlParserSharp.Common 26 | { 27 | // This file contains the attributes that correspond to the annotations 28 | // @NsUri, @Prefix and @Local in the Java code. Probably we can safely remove these. 29 | 30 | [AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Method)] 31 | public class NsUriAttribute : Attribute { } 32 | 33 | [AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Method)] 34 | public class PrefixAttribute : Attribute { } 35 | 36 | [AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Method)] 37 | public class LocalAttribute : Attribute { } 38 | } 39 | -------------------------------------------------------------------------------- /HtmlParserSharp/Common/DoctypeExpectation.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2012 Patrick Reisert 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | * DEALINGS IN THE SOFTWARE. 22 | */ 23 | 24 | using System; 25 | using System.Collections.Generic; 26 | using System.Linq; 27 | using System.Text; 28 | 29 | namespace HtmlParserSharp.Common 30 | { 31 | /// 32 | /// Used for indicating desired behavior with legacy doctypes. 33 | /// 34 | public enum DoctypeExpectation 35 | { 36 | /// 37 | /// Be a pure HTML5 parser. 38 | /// 39 | Html, 40 | 41 | /// 42 | /// Require the HTML 4.01 Transitional public id. Turn on HTML4-specific 43 | /// additional errors regardless of doctype. 44 | /// 45 | Html401Transitional, 46 | 47 | /// 48 | /// Require the HTML 4.01 Transitional public id and a system id. Turn on 49 | /// HTML4-specific additional errors regardless of doctype. 50 | /// 51 | Html401Strict, 52 | 53 | /// 54 | /// Treat the doctype required by HTML 5, doctypes with the HTML 4.01 Strict 55 | /// public id and doctypes with the HTML 4.01 Transitional public id and a 56 | /// system id as non-errors. Turn on HTML4-specific additional errors if the 57 | /// public id is the HTML 4.01 Strict or Transitional public id. 58 | /// 59 | Auto, 60 | 61 | /// 62 | /// Never enable HTML4-specific error checks. Never report any doctype 63 | /// condition as an error. (Doctype tokens in wrong places will be 64 | /// reported as errors, though.) The application may decide what to log 65 | /// in response to calls to DocumentModeHanler. This mode 66 | /// is meant for doing surveys on existing content. 67 | /// 68 | NoDoctypeErrors 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /HtmlParserSharp/Common/DocumentMode.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2008 Mozilla Foundation 4 | * Copyright (c) 2012 Patrick Reisert 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | * DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | namespace HtmlParserSharp.Common 26 | { 27 | public enum DocumentMode 28 | { 29 | /// 30 | /// The Standards Mode 31 | /// 32 | StandardsMode, 33 | 34 | /// 35 | /// The Limited Quirks Mode aka. The Almost Standards Mode 36 | /// 37 | AlmostStandardsMode, 38 | 39 | /// 40 | /// The Quirks Mode 41 | /// 42 | /// 43 | QuirksMode 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /HtmlParserSharp/Common/DocumentModeEventArgs.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Patrick Reisert 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | using System; 24 | 25 | namespace HtmlParserSharp.Common 26 | { 27 | public class DocumentModeEventArgs : EventArgs 28 | { 29 | public DocumentMode Mode { get; private set; } 30 | public string PublicIdentifier { get; private set; } 31 | public string SystemIdentifier { get; private set; } 32 | public bool Html4SpecificAdditionalErrorChecks { get; private set; } 33 | 34 | /// 35 | /// Receive notification of the document mode. 36 | /// 37 | /// The document mode. 38 | /// The public identifier of the doctype or null if unavailable. 39 | /// The system identifier of the doctype or null if unavailable. 40 | /// true if HTML 4-specific checks were enabled, 41 | /// false otherwise 42 | public DocumentModeEventArgs(DocumentMode mode, string publicIdentifier, string systemIdentifier, bool html4SpecificAdditionalErrorChecks) 43 | { 44 | Mode = mode; 45 | PublicIdentifier = publicIdentifier; 46 | SystemIdentifier = systemIdentifier; 47 | Html4SpecificAdditionalErrorChecks = html4SpecificAdditionalErrorChecks; 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /HtmlParserSharp/Common/EncodingDetectedEventArgs.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Patrick Reisert 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | using System; 24 | 25 | namespace HtmlParserSharp.Common 26 | { 27 | public class EncodingDetectedEventArgs : EventArgs 28 | { 29 | public string Encoding { get; private set; } 30 | public bool AcceptEncoding { get; set; } 31 | 32 | public EncodingDetectedEventArgs(string encoding) 33 | { 34 | Encoding = encoding; 35 | AcceptEncoding = false; 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /HtmlParserSharp/Common/ITokenHandler.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2008-2010 Mozilla Foundation 4 | * Copyright (c) 2012 Patrick Reisert 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | * DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | using HtmlParserSharp.Core; 26 | 27 | namespace HtmlParserSharp.Common 28 | { 29 | /// 30 | /// Tokenizer reports tokens through this interface. 31 | /// 32 | public interface ITokenHandler 33 | { 34 | 35 | /// 36 | /// This method is called at the start of tokenization before any other 37 | /// methods on this interface are called. Implementations should hold the 38 | /// reference to the Tokenizer in order to set the content 39 | /// model flag and in order to be able to query for Locator data. 40 | /// 41 | /// The Tokenizer. 42 | void StartTokenization(Tokenizer self); 43 | 44 | /// 45 | /// If this handler implementation cares about comments, return true. 46 | /// If not, return false 47 | /// 48 | /// Whether this handler wants comments 49 | bool WantsComments { get; } 50 | 51 | /// 52 | /// Receive a doctype token. 53 | /// 54 | /// The name. 55 | /// The public identifier. 56 | /// The system identifier. 57 | /// Whether the token is correct. 58 | void Doctype(string name, string publicIdentifier, string systemIdentifier, bool forceQuirks); 59 | 60 | /// 61 | /// Receive a start tag token. 62 | /// 63 | /// The tag name. 64 | /// The attributes. 65 | /// TODO 66 | void StartTag(ElementName eltName, HtmlAttributes attributes, bool selfClosing); 67 | 68 | /// 69 | /// Receive an end tag token. 70 | /// 71 | /// The tag name. 72 | void EndTag(ElementName eltName); 73 | 74 | /// 75 | /// Receive a comment token. The data is junk if thewantsComments() 76 | /// returned false. 77 | /// 78 | /// The buffer holding the data. 79 | /// The offset into the buffer. 80 | /// The number of code units to read. 81 | void Comment(char[] buf, int start, int length); 82 | 83 | /// 84 | /// Receive character tokens. This method has the same semantics as the SAX 85 | /// method of the same name. 86 | /// 87 | /// A buffer holding the data. 88 | /// The offset into the buffer. 89 | /// The number of code units to read. 90 | void Characters(char[] buf, int start, int length); 91 | 92 | /// 93 | /// Reports a U+0000 that's being turned into a U+FFFD. 94 | /// 95 | void ZeroOriginatingReplacementCharacter(); 96 | 97 | /// 98 | /// The end-of-file token. 99 | /// 100 | void Eof(); 101 | 102 | /// 103 | /// The perform final cleanup. 104 | /// 105 | void EndTokenization(); 106 | 107 | /// 108 | /// Checks if the CDATA sections are allowed. 109 | /// 110 | /// true if CDATA sections are allowed 111 | bool IsCDataSectionAllowed { get; } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /HtmlParserSharp/Common/ParserErrorEventArgs.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Patrick Reisert 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | using System; 24 | 25 | namespace HtmlParserSharp.Common 26 | { 27 | public class ParserErrorEventArgs : EventArgs 28 | { 29 | public string Message { get; private set; } 30 | public bool IsWarning { get; private set; } 31 | 32 | public ParserErrorEventArgs(string message, bool isWarning) 33 | { 34 | Message = message; 35 | IsWarning = isWarning; 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /HtmlParserSharp/Common/XmlViolationPolicy.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2012 Patrick Reisert 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | * DEALINGS IN THE SOFTWARE. 22 | */ 23 | 24 | using System; 25 | using System.Collections.Generic; 26 | using System.Linq; 27 | using System.Text; 28 | 29 | namespace HtmlParserSharp.Common 30 | { 31 | /// 32 | /// Policy for XML 1.0 violations. 33 | /// 34 | /// 35 | public enum XmlViolationPolicy 36 | { 37 | /// 38 | /// Conform to HTML 5, allow XML 1.0 to be violated. 39 | /// 40 | Allow, 41 | 42 | /// 43 | /// Halt when something cannot be mapped to XML 1.0. 44 | /// 45 | Fatal, 46 | 47 | /// 48 | /// Be non-conforming and alter the infoset to fit 49 | /// XML 1.0 when something would otherwise not be 50 | /// mappable to XML 1.0. 51 | /// 52 | AlterInfoset 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/CharsetState.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2007-2011 Mozilla Foundation 4 | * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla 5 | * Foundation, and Opera Software ASA. 6 | * Copyright (c) 2012 Patrick Reisert 7 | * 8 | * Permission is hereby granted, free of charge, to any person obtaining a 9 | * copy of this software and associated documentation files (the "Software"), 10 | * to deal in the Software without restriction, including without limitation 11 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 | * and/or sell copies of the Software, and to permit persons to whom the 13 | * Software is furnished to do so, subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be included in 16 | * all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | * DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | namespace HtmlParserSharp.Core 28 | { 29 | public enum CharsetState 30 | { 31 | CHARSET_INITIAL = 0, 32 | 33 | CHARSET_C = 1, 34 | 35 | CHARSET_H = 2, 36 | 37 | CHARSET_A = 3, 38 | 39 | CHARSET_R = 4, 40 | 41 | CHARSET_S = 5, 42 | 43 | CHARSET_E = 6, 44 | 45 | CHARSET_T = 7, 46 | 47 | CHARSET_EQUALS = 8, 48 | 49 | CHARSET_SINGLE_QUOTED = 9, 50 | 51 | CHARSET_DOUBLE_QUOTED = 10, 52 | 53 | CHARSET_UNQUOTED = 11 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/CoalescingTreeBuilder.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2008-2010 Mozilla Foundation 3 | * Copyright (c) 2012 Patrick Reisert 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | * DEALINGS IN THE SOFTWARE. 22 | */ 23 | 24 | using System; 25 | 26 | namespace HtmlParserSharp.Core 27 | { 28 | /// 29 | /// A common superclass for tree builders that coalesce their text nodes. 30 | /// 31 | public abstract class CoalescingTreeBuilder : TreeBuilder where T : class 32 | { 33 | protected override void AccumulateCharacters(char[] buf, int start, int length) 34 | { 35 | int newLen = charBufferLen + length; 36 | if (newLen > charBuffer.Length) 37 | { 38 | char[] newBuf = new char[newLen]; 39 | Array.Copy(charBuffer, newBuf, charBufferLen); 40 | charBuffer = null; // release the old buffer in C++ 41 | charBuffer = newBuf; 42 | } 43 | Array.Copy(buf, start, charBuffer, charBufferLen, length); 44 | charBufferLen = newLen; 45 | } 46 | 47 | override protected void AppendCharacters(T parent, char[] buf, int start, int length) 48 | { 49 | AppendCharacters(parent, new String(buf, start, length)); 50 | } 51 | 52 | 53 | override protected void AppendIsindexPrompt(T parent) 54 | { 55 | AppendCharacters(parent, "This is a searchable index. Enter search keywords: "); 56 | } 57 | 58 | protected abstract void AppendCharacters(T parent, string text); 59 | 60 | override protected void AppendComment(T parent, char[] buf, int start, int length) 61 | { 62 | AppendComment(parent, new String(buf, start, length)); 63 | } 64 | 65 | protected abstract void AppendComment(T parent, string comment); 66 | 67 | override protected void AppendCommentToDocument(char[] buf, int start, int length) 68 | { 69 | // TODO Auto-generated method stub 70 | AppendCommentToDocument(new String(buf, start, length)); 71 | } 72 | 73 | protected abstract void AppendCommentToDocument(string comment); 74 | 75 | override protected void InsertFosterParentedCharacters(char[] buf, int start, 76 | int length, T table, T stackParent) 77 | { 78 | InsertFosterParentedCharacters(new String(buf, start, length), table, stackParent); 79 | } 80 | 81 | protected abstract void InsertFosterParentedCharacters(string text, T table, T stackParent); 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/DispatchGroup.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2007-2011 Mozilla Foundation 4 | * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla 5 | * Foundation, and Opera Software ASA. 6 | * Copyright (c) 2012 Patrick Reisert 7 | * 8 | * Permission is hereby granted, free of charge, to any person obtaining a 9 | * copy of this software and associated documentation files (the "Software"), 10 | * to deal in the Software without restriction, including without limitation 11 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 | * and/or sell copies of the Software, and to permit persons to whom the 13 | * Software is furnished to do so, subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be included in 16 | * all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | * DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | namespace HtmlParserSharp.Core 28 | { 29 | public enum DispatchGroup 30 | { 31 | OTHER = 0, 32 | 33 | A = 1, 34 | 35 | BASE = 2, 36 | 37 | BODY = 3, 38 | 39 | BR = 4, 40 | 41 | BUTTON = 5, 42 | 43 | CAPTION = 6, 44 | 45 | COL = 7, 46 | 47 | COLGROUP = 8, 48 | 49 | FORM = 9, 50 | 51 | FRAME = 10, 52 | 53 | FRAMESET = 11, 54 | 55 | IMAGE = 12, 56 | 57 | INPUT = 13, 58 | 59 | ISINDEX = 14, 60 | 61 | LI = 15, 62 | 63 | LINK_OR_BASEFONT_OR_BGSOUND = 16, 64 | 65 | MATH = 17, 66 | 67 | META = 18, 68 | 69 | SVG = 19, 70 | 71 | HEAD = 20, 72 | 73 | HR = 22, 74 | 75 | HTML = 23, 76 | 77 | NOBR = 24, 78 | 79 | NOFRAMES = 25, 80 | 81 | NOSCRIPT = 26, 82 | 83 | OPTGROUP = 27, 84 | 85 | OPTION = 28, 86 | 87 | P = 29, 88 | 89 | PLAINTEXT = 30, 90 | 91 | SCRIPT = 31, 92 | 93 | SELECT = 32, 94 | 95 | STYLE = 33, 96 | 97 | TABLE = 34, 98 | 99 | TEXTAREA = 35, 100 | 101 | TITLE = 36, 102 | 103 | TR = 37, 104 | 105 | XMP = 38, 106 | 107 | TBODY_OR_THEAD_OR_TFOOT = 39, 108 | 109 | TD_OR_TH = 40, 110 | 111 | DD_OR_DT = 41, 112 | 113 | H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 = 42, 114 | 115 | MARQUEE_OR_APPLET = 43, 116 | 117 | PRE_OR_LISTING = 44, 118 | 119 | B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U = 45, 120 | 121 | UL_OR_OL_OR_DL = 46, 122 | 123 | IFRAME = 47, 124 | 125 | EMBED_OR_IMG = 48, 126 | 127 | AREA_OR_WBR = 49, 128 | 129 | DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU = 50, 130 | 131 | ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY = 51, 132 | 133 | RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR = 52, 134 | 135 | RT_OR_RP = 53, 136 | 137 | COMMAND = 54, 138 | 139 | PARAM_OR_SOURCE_OR_TRACK = 55, 140 | 141 | MGLYPH_OR_MALIGNMARK = 56, 142 | 143 | MI_MO_MN_MS_MTEXT = 57, 144 | 145 | ANNOTATION_XML = 58, 146 | 147 | FOREIGNOBJECT_OR_DESC = 59, 148 | 149 | NOEMBED = 60, 150 | 151 | FIELDSET = 61, 152 | 153 | OUTPUT_OR_LABEL = 62, 154 | 155 | OBJECT = 63, 156 | 157 | FONT = 64, 158 | 159 | KEYGEN = 65 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/DomTreeBuilder.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2008-2010 Mozilla Foundation 4 | * Copyright (c) 2012 Patrick Reisert 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | * DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | using System; 26 | using System.Collections.Generic; 27 | using System.Linq; 28 | using System.Text; 29 | using System.Xml; 30 | using HtmlParserSharp.Common; 31 | using HtmlParserSharp.Core; 32 | 33 | namespace HtmlParserSharp 34 | { 35 | /// 36 | /// The tree builder glue for building a tree through the public DOM APIs. 37 | /// 38 | class DomTreeBuilder : CoalescingTreeBuilder 39 | { 40 | /// 41 | /// The current doc. 42 | /// 43 | private XmlDocument document; 44 | 45 | override protected void AddAttributesToElement(XmlElement element, HtmlAttributes attributes) { 46 | for (int i = 0; i < attributes.Length; i++) { 47 | String localName = attributes.GetLocalName(i); 48 | String uri = attributes.GetURI(i); 49 | if (!element.HasAttribute(localName, uri)) { 50 | element.SetAttribute(localName, uri, 51 | attributes.GetValue(i)); 52 | } 53 | } 54 | } 55 | 56 | override protected void AppendCharacters(XmlElement parent, string text) 57 | { 58 | XmlNode lastChild = parent.LastChild; 59 | if (lastChild != null && lastChild.NodeType == XmlNodeType.Text) { 60 | XmlText lastAsText = (XmlText) lastChild; 61 | lastAsText.Data += text; 62 | return; 63 | } 64 | parent.AppendChild(document.CreateTextNode(text)); 65 | } 66 | 67 | override protected void AppendChildrenToNewParent(XmlElement oldParent, XmlElement newParent) { 68 | while (oldParent.HasChildNodes) { 69 | newParent.AppendChild(oldParent.FirstChild); 70 | } 71 | } 72 | 73 | protected override void AppendDoctypeToDocument(string name, string publicIdentifier, string systemIdentifier) 74 | { 75 | // TODO: this method was not there originally. is it correct? 76 | document.XmlResolver = null; 77 | 78 | if (publicIdentifier == String.Empty) 79 | publicIdentifier = null; 80 | if (systemIdentifier == String.Empty) 81 | systemIdentifier = null; 82 | 83 | var doctype = document.CreateDocumentType(name, publicIdentifier, systemIdentifier, null); 84 | document.XmlResolver = new XmlUrlResolver(); 85 | document.AppendChild(doctype); 86 | } 87 | 88 | override protected void AppendComment(XmlElement parent, String comment) 89 | { 90 | parent.AppendChild(document.CreateComment(comment)); 91 | } 92 | 93 | override protected void AppendCommentToDocument(String comment) 94 | { 95 | document.AppendChild(document.CreateComment(comment)); 96 | } 97 | 98 | override protected XmlElement CreateElement(string ns, string name, HtmlAttributes attributes) 99 | { 100 | XmlElement rv = document.CreateElement(name, ns); 101 | for (int i = 0; i < attributes.Length; i++) 102 | { 103 | rv.SetAttribute(attributes.GetLocalName(i), attributes.GetURI(i), attributes.GetValue(i)); 104 | if (attributes.GetType(i) == "ID") 105 | { 106 | //rv.setIdAttributeNS(null, attributes.GetLocalName(i), true); // FIXME 107 | } 108 | } 109 | return rv; 110 | } 111 | 112 | override protected XmlElement CreateHtmlElementSetAsRoot(HtmlAttributes attributes) 113 | { 114 | XmlElement rv = document.CreateElement("html", "http://www.w3.org/1999/xhtml"); 115 | for (int i = 0; i < attributes.Length; i++) { 116 | rv.SetAttribute(attributes.GetLocalName(i), attributes.GetURI(i), attributes.GetValue(i)); 117 | } 118 | document.AppendChild(rv); 119 | return rv; 120 | } 121 | 122 | override protected void AppendElement(XmlElement child, XmlElement newParent) 123 | { 124 | newParent.AppendChild(child); 125 | } 126 | 127 | override protected bool HasChildren(XmlElement element) 128 | { 129 | return element.HasChildNodes; 130 | } 131 | 132 | override protected XmlElement CreateElement(string ns, string name, HtmlAttributes attributes, XmlElement form) { 133 | XmlElement rv = CreateElement(ns, name, attributes); 134 | //rv.setUserData("nu.validator.form-pointer", form, null); // TODO 135 | return rv; 136 | } 137 | 138 | override protected void Start(bool fragment) { 139 | document = new XmlDocument(); // implementation.createDocument(null, null, null); 140 | // TODO: fragment? 141 | } 142 | 143 | protected override void ReceiveDocumentMode(DocumentMode mode, String publicIdentifier, 144 | String systemIdentifier, bool html4SpecificAdditionalErrorChecks) 145 | { 146 | //document.setUserData("nu.validator.document-mode", mode, null); // TODO 147 | } 148 | 149 | /// 150 | /// Returns the document. 151 | /// 152 | /// The document 153 | internal XmlDocument Document 154 | { 155 | get 156 | { 157 | return document; 158 | } 159 | } 160 | 161 | /// 162 | /// Return the document fragment. 163 | /// 164 | /// The document fragment 165 | internal XmlDocumentFragment getDocumentFragment() { 166 | XmlDocumentFragment rv = document.CreateDocumentFragment(); 167 | XmlNode rootElt = document.FirstChild; 168 | while (rootElt.HasChildNodes) { 169 | rv.AppendChild(rootElt.FirstChild); 170 | } 171 | document = null; 172 | return rv; 173 | } 174 | 175 | override protected void InsertFosterParentedCharacters(string text, XmlElement table, XmlElement stackParent) { 176 | XmlNode parent = table.ParentNode; 177 | if (parent != null) { // always an element if not null 178 | XmlNode previousSibling = table.PreviousSibling; 179 | if (previousSibling != null 180 | && previousSibling.NodeType == XmlNodeType.Text) { 181 | XmlText lastAsText = (XmlText) previousSibling; 182 | lastAsText.Data += text; 183 | return; 184 | } 185 | parent.InsertBefore(document.CreateTextNode(text), table); 186 | return; 187 | } 188 | XmlNode lastChild = stackParent.LastChild; 189 | if (lastChild != null && lastChild.NodeType == XmlNodeType.Text) { 190 | XmlText lastAsText = (XmlText) lastChild; 191 | lastAsText.Data += text; 192 | return; 193 | } 194 | stackParent.AppendChild(document.CreateTextNode(text)); 195 | } 196 | 197 | override protected void InsertFosterParentedChild(XmlElement child, XmlElement table, XmlElement stackParent) { 198 | XmlNode parent = table.ParentNode; 199 | if (parent != null) { // always an element if not null 200 | parent.InsertBefore(child, table); 201 | } else { 202 | stackParent.AppendChild(child); 203 | } 204 | } 205 | 206 | override protected void DetachFromParent(XmlElement element) 207 | { 208 | XmlNode parent = element.ParentNode; 209 | if (parent != null) { 210 | parent.RemoveChild(element); 211 | } 212 | } 213 | } 214 | } 215 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/ElementName.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2008-2011 Mozilla Foundation 3 | * Copyright (c) 2012 Patrick Reisert 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | * DEALINGS IN THE SOFTWARE. 22 | */ 23 | 24 | using System; 25 | using HtmlParserSharp.Common; 26 | 27 | namespace HtmlParserSharp.Core 28 | { 29 | 30 | public sealed class ElementName 31 | // uncomment when regenerating self 32 | // implements Comparable 33 | { 34 | 35 | /// 36 | /// The mask for extracting the dispatch group. 37 | /// 38 | public const int GROUP_MASK = 127; 39 | 40 | /// 41 | /// Indicates that the element is not a pre-interned element. Forbidden 42 | /// on preinterned elements. 43 | /// 44 | public const int CUSTOM = (1 << 30); 45 | 46 | /// 47 | /// Indicates that the element is in the "special" category. This bit 48 | /// should not be pre-set on MathML or SVG specials--only on HTML specials. 49 | /// 50 | public const int SPECIAL = (1 << 29); 51 | 52 | /// 53 | /// The element is foster-parenting. This bit should be pre-set on elements 54 | /// that are foster-parenting as HTML. 55 | /// 56 | public const int FOSTER_PARENTING = (1 << 28); 57 | 58 | /// 59 | /// The element is scoping. This bit should be pre-set on elements 60 | /// that are scoping as HTML. 61 | /// 62 | public const int SCOPING = (1 << 27); 63 | 64 | /// 65 | /// The element is scoping as SVG. 66 | /// 67 | public const int SCOPING_AS_SVG = (1 << 26); 68 | 69 | /// 70 | /// The element is scoping as MathML. 71 | /// 72 | public const int SCOPING_AS_MATHML = (1 << 25); 73 | 74 | /// 75 | /// The element is an HTML integration point. 76 | /// 77 | public const int HTML_INTEGRATION_POINT = (1 << 24); 78 | 79 | /// 80 | /// The element has an optional end tag. 81 | /// 82 | public const int OPTIONAL_END_TAG = (1 << 23); 83 | 84 | public static readonly ElementName NULL_ELEMENT_NAME = new ElementName(null); 85 | 86 | [Local] 87 | public readonly string name; 88 | 89 | [Local] 90 | public readonly string camelCaseName; 91 | 92 | /// 93 | /// The lowest 7 bits are the dispatch group. The high bits are flags. 94 | /// 95 | public readonly int flags; 96 | 97 | public int Flags 98 | { 99 | get 100 | { 101 | return flags; 102 | } 103 | } 104 | 105 | public DispatchGroup Group 106 | { 107 | get 108 | { 109 | return (DispatchGroup)(flags & GROUP_MASK); 110 | } 111 | } 112 | 113 | // [NOCPP[ 114 | 115 | public bool IsCustom 116 | { 117 | get 118 | { 119 | return (flags & CUSTOM) != 0; 120 | } 121 | } 122 | 123 | // ]NOCPP] 124 | 125 | internal static ElementName ElementNameByBuffer(char[] buf, int offset, int length) 126 | { 127 | int hash = ElementName.BufToHash(buf, length); 128 | int index = Array.BinarySearch(ElementName.ELEMENT_HASHES, hash); 129 | if (index < 0) 130 | { 131 | return new ElementName(Portability.NewLocalNameFromBuffer(buf, offset, length)); 132 | } 133 | else 134 | { 135 | ElementName elementName = ElementName.ELEMENT_NAMES[index]; 136 | /*[Local]*/ 137 | string name = elementName.name; 138 | if (!Portability.LocalEqualsBuffer(name, buf, offset, length)) 139 | { 140 | return new ElementName(Portability.NewLocalNameFromBuffer(buf, 141 | offset, length)); 142 | } 143 | return elementName; 144 | } 145 | } 146 | 147 | /// 148 | /// This method has to return a unique integer for each well-known 149 | /// lower-cased element name. 150 | /// 151 | private static int BufToHash(char[] buf, int len) 152 | { 153 | int hash = len; 154 | hash <<= 5; 155 | hash += buf[0] - 0x60; 156 | int j = len; 157 | for (int i = 0; i < 4 && j > 0; i++) 158 | { 159 | j--; 160 | hash <<= 5; 161 | hash += buf[j] - 0x60; 162 | } 163 | return hash; 164 | } 165 | 166 | private ElementName([Local] string name, [Local] string camelCaseName, int flags) 167 | { 168 | this.name = name; 169 | this.camelCaseName = camelCaseName; 170 | this.flags = flags; 171 | } 172 | 173 | internal ElementName([Local] string name) 174 | { 175 | this.name = name; 176 | this.camelCaseName = name; 177 | this.flags = (int) DispatchGroup.OTHER | CUSTOM; 178 | } 179 | 180 | /*virtual*/ public ElementName CloneElementName() 181 | { 182 | return this; 183 | } 184 | 185 | // START CODE ONLY USED FOR GENERATING CODE uncomment and run to regenerate 186 | 187 | ///// 188 | ///// Returns a that represents this instance. 189 | ///// 190 | ///// 191 | ///// A that represents this instance. 192 | ///// 193 | //override public string ToString() { 194 | // return "(\"" + name + "\", \"" + camelCaseName + "\", " + decomposedFlags() + ")"; 195 | //} 196 | 197 | //private string DecomposedFlags() { 198 | // StringBuilder buf = new StringBuilder("TreeBuilderConstants."); 199 | // buf.Append(treeBuilderGroupToName()); 200 | // if ((flags & SPECIAL) != 0) { 201 | // buf.Append(" | SPECIAL"); 202 | // } 203 | // if ((flags & FOSTER_PARENTING) != 0) { 204 | // buf.Append(" | FOSTER_PARENTING"); 205 | // } 206 | // if ((flags & SCOPING) != 0) { 207 | // buf.Append(" | SCOPING"); 208 | // } 209 | // if ((flags & SCOPING_AS_MATHML) != 0) { 210 | // buf.Append(" | SCOPING_AS_MATHML"); 211 | // } 212 | // if ((flags & SCOPING_AS_SVG) != 0) { 213 | // buf.Append(" | SCOPING_AS_SVG"); 214 | // } 215 | // if ((flags & OPTIONAL_END_TAG) != 0) { 216 | // buf.Append(" | OPTIONAL_END_TAG"); 217 | // } 218 | // return buf.ToString(); 219 | //} 220 | 221 | //private string constName() { 222 | // char[] buf = new char[name.Length]; 223 | // for (int i = 0; i < name.Length; i++) { 224 | // char c = name[i]; 225 | // if (c == '-') { 226 | // buf[i] = '_'; 227 | // } else if (c >= '0' && c <= '9') { 228 | // buf[i] = c; 229 | // } else { 230 | // buf[i] = (char) (c - 0x20); 231 | // } 232 | // } 233 | // return new String(buf); 234 | //} 235 | 236 | //private int hash() { 237 | // return BufToHash(name.ToCharArray(), name.Length); 238 | //} 239 | 240 | //public int CompareTo(ElementName other) { 241 | // int thisHash = this.hash(); 242 | // int otherHash = other.hash(); 243 | // if (thisHash < otherHash) { 244 | // return -1; 245 | // } else if (thisHash == otherHash) { 246 | // return 0; 247 | // } else { 248 | // return 1; 249 | // } 250 | //} 251 | 252 | //private string TreeBuilderGroupToName() { 253 | // switch (GetGroup()) { 254 | // case TreeBuilderConstants.OTHER: 255 | // return "OTHER"; 256 | // case TreeBuilderConstants.A: 257 | // return "A"; 258 | // case TreeBuilderConstants.BASE: 259 | // return "BASE"; 260 | // case TreeBuilderConstants.BODY: 261 | // return "BODY"; 262 | // case TreeBuilderConstants.BR: 263 | // return "BR"; 264 | // case TreeBuilderConstants.BUTTON: 265 | // return "BUTTON"; 266 | // case TreeBuilderConstants.CAPTION: 267 | // return "CAPTION"; 268 | // case TreeBuilderConstants.COL: 269 | // return "COL"; 270 | // case TreeBuilderConstants.COLGROUP: 271 | // return "COLGROUP"; 272 | // case TreeBuilderConstants.FONT: 273 | // return "FONT"; 274 | // case TreeBuilderConstants.FORM: 275 | // return "FORM"; 276 | // case TreeBuilderConstants.FRAME: 277 | // return "FRAME"; 278 | // case TreeBuilderConstants.FRAMESET: 279 | // return "FRAMESET"; 280 | // case TreeBuilderConstants.IMAGE: 281 | // return "IMAGE"; 282 | // case TreeBuilderConstants.INPUT: 283 | // return "INPUT"; 284 | // case TreeBuilderConstants.ISINDEX: 285 | // return "ISINDEX"; 286 | // case TreeBuilderConstants.LI: 287 | // return "LI"; 288 | // case TreeBuilderConstants.LINK_OR_BASEFONT_OR_BGSOUND: 289 | // return "LINK_OR_BASEFONT_OR_BGSOUND"; 290 | // case TreeBuilderConstants.MATH: 291 | // return "MATH"; 292 | // case TreeBuilderConstants.META: 293 | // return "META"; 294 | // case TreeBuilderConstants.SVG: 295 | // return "SVG"; 296 | // case TreeBuilderConstants.HEAD: 297 | // return "HEAD"; 298 | // case TreeBuilderConstants.HR: 299 | // return "HR"; 300 | // case TreeBuilderConstants.HTML: 301 | // return "HTML"; 302 | // case TreeBuilderConstants.KEYGEN: 303 | // return "KEYGEN"; 304 | // case TreeBuilderConstants.NOBR: 305 | // return "NOBR"; 306 | // case TreeBuilderConstants.NOFRAMES: 307 | // return "NOFRAMES"; 308 | // case TreeBuilderConstants.NOSCRIPT: 309 | // return "NOSCRIPT"; 310 | // case TreeBuilderConstants.OPTGROUP: 311 | // return "OPTGROUP"; 312 | // case TreeBuilderConstants.OPTION: 313 | // return "OPTION"; 314 | // case TreeBuilderConstants.P: 315 | // return "P"; 316 | // case TreeBuilderConstants.PLAINTEXT: 317 | // return "PLAINTEXT"; 318 | // case TreeBuilderConstants.SCRIPT: 319 | // return "SCRIPT"; 320 | // case TreeBuilderConstants.SELECT: 321 | // return "SELECT"; 322 | // case TreeBuilderConstants.STYLE: 323 | // return "STYLE"; 324 | // case TreeBuilderConstants.TABLE: 325 | // return "TABLE"; 326 | // case TreeBuilderConstants.TEXTAREA: 327 | // return "TEXTAREA"; 328 | // case TreeBuilderConstants.TITLE: 329 | // return "TITLE"; 330 | // case TreeBuilderConstants.TR: 331 | // return "TR"; 332 | // case TreeBuilderConstants.XMP: 333 | // return "XMP"; 334 | // case TreeBuilderConstants.TBODY_OR_THEAD_OR_TFOOT: 335 | // return "TBODY_OR_THEAD_OR_TFOOT"; 336 | // case TreeBuilderConstants.TD_OR_TH: 337 | // return "TD_OR_TH"; 338 | // case TreeBuilderConstants.DD_OR_DT: 339 | // return "DD_OR_DT"; 340 | // case TreeBuilderConstants.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6: 341 | // return "H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6"; 342 | // case TreeBuilderConstants.OBJECT: 343 | // return "OBJECT"; 344 | // case TreeBuilderConstants.OUTPUT_OR_LABEL: 345 | // return "OUTPUT_OR_LABEL"; 346 | // case TreeBuilderConstants.MARQUEE_OR_APPLET: 347 | // return "MARQUEE_OR_APPLET"; 348 | // case TreeBuilderConstants.PRE_OR_LISTING: 349 | // return "PRE_OR_LISTING"; 350 | // case TreeBuilderConstants.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U: 351 | // return "B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U"; 352 | // case TreeBuilderConstants.UL_OR_OL_OR_DL: 353 | // return "UL_OR_OL_OR_DL"; 354 | // case TreeBuilderConstants.IFRAME: 355 | // return "IFRAME"; 356 | // case TreeBuilderConstants.NOEMBED: 357 | // return "NOEMBED"; 358 | // case TreeBuilderConstants.EMBED_OR_IMG: 359 | // return "EMBED_OR_IMG"; 360 | // case TreeBuilderConstants.AREA_OR_WBR: 361 | // return "AREA_OR_WBR"; 362 | // case TreeBuilderConstants.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU: 363 | // return "DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU"; 364 | // case TreeBuilderConstants.FIELDSET: 365 | // return "FIELDSET"; 366 | // case TreeBuilderConstants.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY: 367 | // return "ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY"; 368 | // case TreeBuilderConstants.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR: 369 | // return "RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR"; 370 | // case TreeBuilderConstants.RT_OR_RP: 371 | // return "RT_OR_RP"; 372 | // case TreeBuilderConstants.COMMAND: 373 | // return "COMMAND"; 374 | // case TreeBuilderConstants.PARAM_OR_SOURCE_OR_TRACK: 375 | // return "PARAM_OR_SOURCE_OR_TRACK"; 376 | // case TreeBuilderConstants.MGLYPH_OR_MALIGNMARK: 377 | // return "MGLYPH_OR_MALIGNMARK"; 378 | // case TreeBuilderConstants.MI_MO_MN_MS_MTEXT: 379 | // return "MI_MO_MN_MS_MTEXT"; 380 | // case TreeBuilderConstants.ANNOTATION_XML: 381 | // return "ANNOTATION_XML"; 382 | // case TreeBuilderConstants.FOREIGNOBJECT_OR_DESC: 383 | // return "FOREIGNOBJECT_OR_DESC"; 384 | // } 385 | // return null; 386 | //} 387 | 388 | ///** 389 | // * Regenerate self 390 | // * 391 | // * @param args 392 | // */ 393 | //public static void main(String[] args) { 394 | // Arrays.sort(ELEMENT_NAMES); 395 | // for (int i = 1; i < ELEMENT_NAMES.length; i++) { 396 | // if (ELEMENT_NAMES[i].hash() == ELEMENT_NAMES[i - 1].hash()) { 397 | // System.err.println("Hash collision: " + ELEMENT_NAMES[i].name 398 | // + ", " + ELEMENT_NAMES[i - 1].name); 399 | // return; 400 | // } 401 | // } 402 | // for (int i = 0; i < ELEMENT_NAMES.length; i++) { 403 | // ElementName el = ELEMENT_NAMES[i]; 404 | // System.out.println("public static readonly ElementName " 405 | // + el.constName() + " = new ElementName" + el.toString() 406 | // + ";"); 407 | // } 408 | // System.out.println("private final static @NoLength ElementName[] ELEMENT_NAMES = {"); 409 | // for (int i = 0; i < ELEMENT_NAMES.length; i++) { 410 | // ElementName el = ELEMENT_NAMES[i]; 411 | // System.out.println(el.constName() + ","); 412 | // } 413 | // System.out.println("};"); 414 | // System.out.println("private final static int[] ELEMENT_HASHES = {"); 415 | // for (int i = 0; i < ELEMENT_NAMES.length; i++) { 416 | // ElementName el = ELEMENT_NAMES[i]; 417 | // System.out.println(Integer.toString(el.hash()) + ","); 418 | // } 419 | // System.out.println("};"); 420 | //} 421 | 422 | // START GENERATED CODE 423 | public static readonly ElementName A = new ElementName("a", "a", (int) DispatchGroup.A); 424 | public static readonly ElementName B = new ElementName("b", "b", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 425 | public static readonly ElementName G = new ElementName("g", "g", (int) DispatchGroup.OTHER); 426 | public static readonly ElementName I = new ElementName("i", "i", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 427 | public static readonly ElementName P = new ElementName("p", "p", (int) DispatchGroup.P | SPECIAL | OPTIONAL_END_TAG); 428 | public static readonly ElementName Q = new ElementName("q", "q", (int) DispatchGroup.OTHER); 429 | public static readonly ElementName S = new ElementName("s", "s", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 430 | public static readonly ElementName U = new ElementName("u", "u", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 431 | public static readonly ElementName BR = new ElementName("br", "br", (int) DispatchGroup.BR | SPECIAL); 432 | public static readonly ElementName CI = new ElementName("ci", "ci", (int) DispatchGroup.OTHER); 433 | public static readonly ElementName CN = new ElementName("cn", "cn", (int) DispatchGroup.OTHER); 434 | public static readonly ElementName DD = new ElementName("dd", "dd", (int) DispatchGroup.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG); 435 | public static readonly ElementName DL = new ElementName("dl", "dl", (int) DispatchGroup.UL_OR_OL_OR_DL | SPECIAL); 436 | public static readonly ElementName DT = new ElementName("dt", "dt", (int) DispatchGroup.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG); 437 | public static readonly ElementName EM = new ElementName("em", "em", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 438 | public static readonly ElementName EQ = new ElementName("eq", "eq", (int) DispatchGroup.OTHER); 439 | public static readonly ElementName FN = new ElementName("fn", "fn", (int) DispatchGroup.OTHER); 440 | public static readonly ElementName H1 = new ElementName("h1", "h1", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); 441 | public static readonly ElementName H2 = new ElementName("h2", "h2", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); 442 | public static readonly ElementName H3 = new ElementName("h3", "h3", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); 443 | public static readonly ElementName H4 = new ElementName("h4", "h4", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); 444 | public static readonly ElementName H5 = new ElementName("h5", "h5", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); 445 | public static readonly ElementName H6 = new ElementName("h6", "h6", (int) DispatchGroup.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); 446 | public static readonly ElementName GT = new ElementName("gt", "gt", (int) DispatchGroup.OTHER); 447 | public static readonly ElementName HR = new ElementName("hr", "hr", (int) DispatchGroup.HR | SPECIAL); 448 | public static readonly ElementName IN = new ElementName("in", "in", (int) DispatchGroup.OTHER); 449 | public static readonly ElementName LI = new ElementName("li", "li", (int) DispatchGroup.LI | SPECIAL | OPTIONAL_END_TAG); 450 | public static readonly ElementName LN = new ElementName("ln", "ln", (int) DispatchGroup.OTHER); 451 | public static readonly ElementName LT = new ElementName("lt", "lt", (int) DispatchGroup.OTHER); 452 | public static readonly ElementName MI = new ElementName("mi", "mi", (int) DispatchGroup.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); 453 | public static readonly ElementName MN = new ElementName("mn", "mn", (int) DispatchGroup.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); 454 | public static readonly ElementName MO = new ElementName("mo", "mo", (int) DispatchGroup.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); 455 | public static readonly ElementName MS = new ElementName("ms", "ms", (int) DispatchGroup.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); 456 | public static readonly ElementName OL = new ElementName("ol", "ol", (int) DispatchGroup.UL_OR_OL_OR_DL | SPECIAL); 457 | public static readonly ElementName OR = new ElementName("or", "or", (int) DispatchGroup.OTHER); 458 | public static readonly ElementName PI = new ElementName("pi", "pi", (int) DispatchGroup.OTHER); 459 | public static readonly ElementName RP = new ElementName("rp", "rp", (int) DispatchGroup.RT_OR_RP | OPTIONAL_END_TAG); 460 | public static readonly ElementName RT = new ElementName("rt", "rt", (int) DispatchGroup.RT_OR_RP | OPTIONAL_END_TAG); 461 | public static readonly ElementName TD = new ElementName("td", "td", (int) DispatchGroup.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG); 462 | public static readonly ElementName TH = new ElementName("th", "th", (int) DispatchGroup.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG); 463 | public static readonly ElementName TR = new ElementName("tr", "tr", (int) DispatchGroup.TR | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); 464 | public static readonly ElementName TT = new ElementName("tt", "tt", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 465 | public static readonly ElementName UL = new ElementName("ul", "ul", (int) DispatchGroup.UL_OR_OL_OR_DL | SPECIAL); 466 | public static readonly ElementName AND = new ElementName("and", "and", (int) DispatchGroup.OTHER); 467 | public static readonly ElementName ARG = new ElementName("arg", "arg", (int) DispatchGroup.OTHER); 468 | public static readonly ElementName ABS = new ElementName("abs", "abs", (int) DispatchGroup.OTHER); 469 | public static readonly ElementName BIG = new ElementName("big", "big", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 470 | public static readonly ElementName BDO = new ElementName("bdo", "bdo", (int) DispatchGroup.OTHER); 471 | public static readonly ElementName CSC = new ElementName("csc", "csc", (int) DispatchGroup.OTHER); 472 | public static readonly ElementName COL = new ElementName("col", "col", (int) DispatchGroup.COL | SPECIAL); 473 | public static readonly ElementName COS = new ElementName("cos", "cos", (int) DispatchGroup.OTHER); 474 | public static readonly ElementName COT = new ElementName("cot", "cot", (int) DispatchGroup.OTHER); 475 | public static readonly ElementName DEL = new ElementName("del", "del", (int) DispatchGroup.OTHER); 476 | public static readonly ElementName DFN = new ElementName("dfn", "dfn", (int) DispatchGroup.OTHER); 477 | public static readonly ElementName DIR = new ElementName("dir", "dir", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 478 | public static readonly ElementName DIV = new ElementName("div", "div", (int) DispatchGroup.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); 479 | public static readonly ElementName EXP = new ElementName("exp", "exp", (int) DispatchGroup.OTHER); 480 | public static readonly ElementName GCD = new ElementName("gcd", "gcd", (int) DispatchGroup.OTHER); 481 | public static readonly ElementName GEQ = new ElementName("geq", "geq", (int) DispatchGroup.OTHER); 482 | public static readonly ElementName IMG = new ElementName("img", "img", (int) DispatchGroup.EMBED_OR_IMG | SPECIAL); 483 | public static readonly ElementName INS = new ElementName("ins", "ins", (int) DispatchGroup.OTHER); 484 | public static readonly ElementName INT = new ElementName("int", "int", (int) DispatchGroup.OTHER); 485 | public static readonly ElementName KBD = new ElementName("kbd", "kbd", (int) DispatchGroup.OTHER); 486 | public static readonly ElementName LOG = new ElementName("log", "log", (int) DispatchGroup.OTHER); 487 | public static readonly ElementName LCM = new ElementName("lcm", "lcm", (int) DispatchGroup.OTHER); 488 | public static readonly ElementName LEQ = new ElementName("leq", "leq", (int) DispatchGroup.OTHER); 489 | public static readonly ElementName MTD = new ElementName("mtd", "mtd", (int) DispatchGroup.OTHER); 490 | public static readonly ElementName MIN = new ElementName("min", "min", (int) DispatchGroup.OTHER); 491 | public static readonly ElementName MAP = new ElementName("map", "map", (int) DispatchGroup.OTHER); 492 | public static readonly ElementName MTR = new ElementName("mtr", "mtr", (int) DispatchGroup.OTHER); 493 | public static readonly ElementName MAX = new ElementName("max", "max", (int) DispatchGroup.OTHER); 494 | public static readonly ElementName NEQ = new ElementName("neq", "neq", (int) DispatchGroup.OTHER); 495 | public static readonly ElementName NOT = new ElementName("not", "not", (int) DispatchGroup.OTHER); 496 | public static readonly ElementName NAV = new ElementName("nav", "nav", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 497 | public static readonly ElementName PRE = new ElementName("pre", "pre", (int) DispatchGroup.PRE_OR_LISTING | SPECIAL); 498 | public static readonly ElementName REM = new ElementName("rem", "rem", (int) DispatchGroup.OTHER); 499 | public static readonly ElementName SUB = new ElementName("sub", "sub", (int) DispatchGroup.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); 500 | public static readonly ElementName SEC = new ElementName("sec", "sec", (int) DispatchGroup.OTHER); 501 | public static readonly ElementName SVG = new ElementName("svg", "svg", (int) DispatchGroup.SVG); 502 | public static readonly ElementName SUM = new ElementName("sum", "sum", (int) DispatchGroup.OTHER); 503 | public static readonly ElementName SIN = new ElementName("sin", "sin", (int) DispatchGroup.OTHER); 504 | public static readonly ElementName SEP = new ElementName("sep", "sep", (int) DispatchGroup.OTHER); 505 | public static readonly ElementName SUP = new ElementName("sup", "sup", (int) DispatchGroup.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); 506 | public static readonly ElementName SET = new ElementName("set", "set", (int) DispatchGroup.OTHER); 507 | public static readonly ElementName TAN = new ElementName("tan", "tan", (int) DispatchGroup.OTHER); 508 | public static readonly ElementName USE = new ElementName("use", "use", (int) DispatchGroup.OTHER); 509 | public static readonly ElementName VAR = new ElementName("var", "var", (int) DispatchGroup.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); 510 | public static readonly ElementName WBR = new ElementName("wbr", "wbr", (int) DispatchGroup.AREA_OR_WBR | SPECIAL); 511 | public static readonly ElementName XMP = new ElementName("xmp", "xmp", (int) DispatchGroup.XMP); 512 | public static readonly ElementName XOR = new ElementName("xor", "xor", (int) DispatchGroup.OTHER); 513 | public static readonly ElementName AREA = new ElementName("area", "area", (int) DispatchGroup.AREA_OR_WBR | SPECIAL); 514 | public static readonly ElementName ABBR = new ElementName("abbr", "abbr", (int) DispatchGroup.OTHER); 515 | public static readonly ElementName BASE = new ElementName("base", "base", (int) DispatchGroup.BASE | SPECIAL); 516 | public static readonly ElementName BVAR = new ElementName("bvar", "bvar", (int) DispatchGroup.OTHER); 517 | public static readonly ElementName BODY = new ElementName("body", "body", (int) DispatchGroup.BODY | SPECIAL | OPTIONAL_END_TAG); 518 | public static readonly ElementName CARD = new ElementName("card", "card", (int) DispatchGroup.OTHER); 519 | public static readonly ElementName CODE = new ElementName("code", "code", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 520 | public static readonly ElementName CITE = new ElementName("cite", "cite", (int) DispatchGroup.OTHER); 521 | public static readonly ElementName CSCH = new ElementName("csch", "csch", (int) DispatchGroup.OTHER); 522 | public static readonly ElementName COSH = new ElementName("cosh", "cosh", (int) DispatchGroup.OTHER); 523 | public static readonly ElementName COTH = new ElementName("coth", "coth", (int) DispatchGroup.OTHER); 524 | public static readonly ElementName CURL = new ElementName("curl", "curl", (int) DispatchGroup.OTHER); 525 | public static readonly ElementName DESC = new ElementName("desc", "desc", (int) DispatchGroup.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG); 526 | public static readonly ElementName DIFF = new ElementName("diff", "diff", (int) DispatchGroup.OTHER); 527 | public static readonly ElementName DEFS = new ElementName("defs", "defs", (int) DispatchGroup.OTHER); 528 | public static readonly ElementName FORM = new ElementName("form", "form", (int) DispatchGroup.FORM | SPECIAL); 529 | public static readonly ElementName FONT = new ElementName("font", "font", (int) DispatchGroup.FONT); 530 | public static readonly ElementName GRAD = new ElementName("grad", "grad", (int) DispatchGroup.OTHER); 531 | public static readonly ElementName HEAD = new ElementName("head", "head", (int) DispatchGroup.HEAD | SPECIAL | OPTIONAL_END_TAG); 532 | public static readonly ElementName HTML = new ElementName("html", "html", (int) DispatchGroup.HTML | SPECIAL | SCOPING | OPTIONAL_END_TAG); 533 | public static readonly ElementName LINE = new ElementName("line", "line", (int) DispatchGroup.OTHER); 534 | public static readonly ElementName LINK = new ElementName("link", "link", (int) DispatchGroup.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL); 535 | public static readonly ElementName LIST = new ElementName("list", "list", (int) DispatchGroup.OTHER); 536 | public static readonly ElementName META = new ElementName("meta", "meta", (int) DispatchGroup.META | SPECIAL); 537 | public static readonly ElementName MSUB = new ElementName("msub", "msub", (int) DispatchGroup.OTHER); 538 | public static readonly ElementName MODE = new ElementName("mode", "mode", (int) DispatchGroup.OTHER); 539 | public static readonly ElementName MATH = new ElementName("math", "math", (int) DispatchGroup.MATH); 540 | public static readonly ElementName MARK = new ElementName("mark", "mark", (int) DispatchGroup.OTHER); 541 | public static readonly ElementName MASK = new ElementName("mask", "mask", (int) DispatchGroup.OTHER); 542 | public static readonly ElementName MEAN = new ElementName("mean", "mean", (int) DispatchGroup.OTHER); 543 | public static readonly ElementName MSUP = new ElementName("msup", "msup", (int) DispatchGroup.OTHER); 544 | public static readonly ElementName MENU = new ElementName("menu", "menu", (int) DispatchGroup.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); 545 | public static readonly ElementName MROW = new ElementName("mrow", "mrow", (int) DispatchGroup.OTHER); 546 | public static readonly ElementName NONE = new ElementName("none", "none", (int) DispatchGroup.OTHER); 547 | public static readonly ElementName NOBR = new ElementName("nobr", "nobr", (int) DispatchGroup.NOBR); 548 | public static readonly ElementName NEST = new ElementName("nest", "nest", (int) DispatchGroup.OTHER); 549 | public static readonly ElementName PATH = new ElementName("path", "path", (int) DispatchGroup.OTHER); 550 | public static readonly ElementName PLUS = new ElementName("plus", "plus", (int) DispatchGroup.OTHER); 551 | public static readonly ElementName RULE = new ElementName("rule", "rule", (int) DispatchGroup.OTHER); 552 | public static readonly ElementName REAL = new ElementName("real", "real", (int) DispatchGroup.OTHER); 553 | public static readonly ElementName RELN = new ElementName("reln", "reln", (int) DispatchGroup.OTHER); 554 | public static readonly ElementName RECT = new ElementName("rect", "rect", (int) DispatchGroup.OTHER); 555 | public static readonly ElementName ROOT = new ElementName("root", "root", (int) DispatchGroup.OTHER); 556 | public static readonly ElementName RUBY = new ElementName("ruby", "ruby", (int) DispatchGroup.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); 557 | public static readonly ElementName SECH = new ElementName("sech", "sech", (int) DispatchGroup.OTHER); 558 | public static readonly ElementName SINH = new ElementName("sinh", "sinh", (int) DispatchGroup.OTHER); 559 | public static readonly ElementName SPAN = new ElementName("span", "span", (int) DispatchGroup.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); 560 | public static readonly ElementName SAMP = new ElementName("samp", "samp", (int) DispatchGroup.OTHER); 561 | public static readonly ElementName STOP = new ElementName("stop", "stop", (int) DispatchGroup.OTHER); 562 | public static readonly ElementName SDEV = new ElementName("sdev", "sdev", (int) DispatchGroup.OTHER); 563 | public static readonly ElementName TIME = new ElementName("time", "time", (int) DispatchGroup.OTHER); 564 | public static readonly ElementName TRUE = new ElementName("true", "true", (int) DispatchGroup.OTHER); 565 | public static readonly ElementName TREF = new ElementName("tref", "tref", (int) DispatchGroup.OTHER); 566 | public static readonly ElementName TANH = new ElementName("tanh", "tanh", (int) DispatchGroup.OTHER); 567 | public static readonly ElementName TEXT = new ElementName("text", "text", (int) DispatchGroup.OTHER); 568 | public static readonly ElementName VIEW = new ElementName("view", "view", (int) DispatchGroup.OTHER); 569 | public static readonly ElementName ASIDE = new ElementName("aside", "aside", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 570 | public static readonly ElementName AUDIO = new ElementName("audio", "audio", (int) DispatchGroup.OTHER); 571 | public static readonly ElementName APPLY = new ElementName("apply", "apply", (int) DispatchGroup.OTHER); 572 | public static readonly ElementName EMBED = new ElementName("embed", "embed", (int) DispatchGroup.EMBED_OR_IMG | SPECIAL); 573 | public static readonly ElementName FRAME = new ElementName("frame", "frame", (int) DispatchGroup.FRAME | SPECIAL); 574 | public static readonly ElementName FALSE = new ElementName("false", "false", (int) DispatchGroup.OTHER); 575 | public static readonly ElementName FLOOR = new ElementName("floor", "floor", (int) DispatchGroup.OTHER); 576 | public static readonly ElementName GLYPH = new ElementName("glyph", "glyph", (int) DispatchGroup.OTHER); 577 | public static readonly ElementName HKERN = new ElementName("hkern", "hkern", (int) DispatchGroup.OTHER); 578 | public static readonly ElementName IMAGE = new ElementName("image", "image", (int) DispatchGroup.IMAGE | SPECIAL); 579 | public static readonly ElementName IDENT = new ElementName("ident", "ident", (int) DispatchGroup.OTHER); 580 | public static readonly ElementName INPUT = new ElementName("input", "input", (int) DispatchGroup.INPUT | SPECIAL); 581 | public static readonly ElementName LABEL = new ElementName("label", "label", (int) DispatchGroup.OUTPUT_OR_LABEL); 582 | public static readonly ElementName LIMIT = new ElementName("limit", "limit", (int) DispatchGroup.OTHER); 583 | public static readonly ElementName MFRAC = new ElementName("mfrac", "mfrac", (int) DispatchGroup.OTHER); 584 | public static readonly ElementName MPATH = new ElementName("mpath", "mpath", (int) DispatchGroup.OTHER); 585 | public static readonly ElementName METER = new ElementName("meter", "meter", (int) DispatchGroup.OTHER); 586 | public static readonly ElementName MOVER = new ElementName("mover", "mover", (int) DispatchGroup.OTHER); 587 | public static readonly ElementName MINUS = new ElementName("minus", "minus", (int) DispatchGroup.OTHER); 588 | public static readonly ElementName MROOT = new ElementName("mroot", "mroot", (int) DispatchGroup.OTHER); 589 | public static readonly ElementName MSQRT = new ElementName("msqrt", "msqrt", (int) DispatchGroup.OTHER); 590 | public static readonly ElementName MTEXT = new ElementName("mtext", "mtext", (int) DispatchGroup.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); 591 | public static readonly ElementName NOTIN = new ElementName("notin", "notin", (int) DispatchGroup.OTHER); 592 | public static readonly ElementName PIECE = new ElementName("piece", "piece", (int) DispatchGroup.OTHER); 593 | public static readonly ElementName PARAM = new ElementName("param", "param", (int) DispatchGroup.PARAM_OR_SOURCE_OR_TRACK | SPECIAL); 594 | public static readonly ElementName POWER = new ElementName("power", "power", (int) DispatchGroup.OTHER); 595 | public static readonly ElementName REALS = new ElementName("reals", "reals", (int) DispatchGroup.OTHER); 596 | public static readonly ElementName STYLE = new ElementName("style", "style", (int) DispatchGroup.STYLE | SPECIAL); 597 | public static readonly ElementName SMALL = new ElementName("small", "small", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 598 | public static readonly ElementName THEAD = new ElementName("thead", "thead", (int) DispatchGroup.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); 599 | public static readonly ElementName TABLE = new ElementName("table", "table", (int) DispatchGroup.TABLE | SPECIAL | FOSTER_PARENTING | SCOPING); 600 | public static readonly ElementName TITLE = new ElementName("title", "title", (int) DispatchGroup.TITLE | SPECIAL | SCOPING_AS_SVG); 601 | public static readonly ElementName TRACK = new ElementName("track", "track", (int) DispatchGroup.PARAM_OR_SOURCE_OR_TRACK); 602 | public static readonly ElementName TSPAN = new ElementName("tspan", "tspan", (int) DispatchGroup.OTHER); 603 | public static readonly ElementName TIMES = new ElementName("times", "times", (int) DispatchGroup.OTHER); 604 | public static readonly ElementName TFOOT = new ElementName("tfoot", "tfoot", (int) DispatchGroup.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); 605 | public static readonly ElementName TBODY = new ElementName("tbody", "tbody", (int) DispatchGroup.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); 606 | public static readonly ElementName UNION = new ElementName("union", "union", (int) DispatchGroup.OTHER); 607 | public static readonly ElementName VKERN = new ElementName("vkern", "vkern", (int) DispatchGroup.OTHER); 608 | public static readonly ElementName VIDEO = new ElementName("video", "video", (int) DispatchGroup.OTHER); 609 | public static readonly ElementName ARCSEC = new ElementName("arcsec", "arcsec", (int) DispatchGroup.OTHER); 610 | public static readonly ElementName ARCCSC = new ElementName("arccsc", "arccsc", (int) DispatchGroup.OTHER); 611 | public static readonly ElementName ARCTAN = new ElementName("arctan", "arctan", (int) DispatchGroup.OTHER); 612 | public static readonly ElementName ARCSIN = new ElementName("arcsin", "arcsin", (int) DispatchGroup.OTHER); 613 | public static readonly ElementName ARCCOS = new ElementName("arccos", "arccos", (int) DispatchGroup.OTHER); 614 | public static readonly ElementName APPLET = new ElementName("applet", "applet", (int) DispatchGroup.MARQUEE_OR_APPLET | SPECIAL | SCOPING); 615 | public static readonly ElementName ARCCOT = new ElementName("arccot", "arccot", (int) DispatchGroup.OTHER); 616 | public static readonly ElementName APPROX = new ElementName("approx", "approx", (int) DispatchGroup.OTHER); 617 | public static readonly ElementName BUTTON = new ElementName("button", "button", (int) DispatchGroup.BUTTON | SPECIAL); 618 | public static readonly ElementName CIRCLE = new ElementName("circle", "circle", (int) DispatchGroup.OTHER); 619 | public static readonly ElementName CENTER = new ElementName("center", "center", (int) DispatchGroup.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); 620 | public static readonly ElementName CURSOR = new ElementName("cursor", "cursor", (int) DispatchGroup.OTHER); 621 | public static readonly ElementName CANVAS = new ElementName("canvas", "canvas", (int) DispatchGroup.OTHER); 622 | public static readonly ElementName DIVIDE = new ElementName("divide", "divide", (int) DispatchGroup.OTHER); 623 | public static readonly ElementName DEGREE = new ElementName("degree", "degree", (int) DispatchGroup.OTHER); 624 | public static readonly ElementName DOMAIN = new ElementName("domain", "domain", (int) DispatchGroup.OTHER); 625 | public static readonly ElementName EXISTS = new ElementName("exists", "exists", (int) DispatchGroup.OTHER); 626 | public static readonly ElementName FETILE = new ElementName("fetile", "feTile", (int) DispatchGroup.OTHER); 627 | public static readonly ElementName FIGURE = new ElementName("figure", "figure", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 628 | public static readonly ElementName FORALL = new ElementName("forall", "forall", (int) DispatchGroup.OTHER); 629 | public static readonly ElementName FILTER = new ElementName("filter", "filter", (int) DispatchGroup.OTHER); 630 | public static readonly ElementName FOOTER = new ElementName("footer", "footer", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 631 | public static readonly ElementName HGROUP = new ElementName("hgroup", "hgroup", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 632 | public static readonly ElementName HEADER = new ElementName("header", "header", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 633 | public static readonly ElementName IFRAME = new ElementName("iframe", "iframe", (int) DispatchGroup.IFRAME | SPECIAL); 634 | public static readonly ElementName KEYGEN = new ElementName("keygen", "keygen", (int) DispatchGroup.KEYGEN | SPECIAL); 635 | public static readonly ElementName LAMBDA = new ElementName("lambda", "lambda", (int) DispatchGroup.OTHER); 636 | public static readonly ElementName LEGEND = new ElementName("legend", "legend", (int) DispatchGroup.OTHER); 637 | public static readonly ElementName MSPACE = new ElementName("mspace", "mspace", (int) DispatchGroup.OTHER); 638 | public static readonly ElementName MTABLE = new ElementName("mtable", "mtable", (int) DispatchGroup.OTHER); 639 | public static readonly ElementName MSTYLE = new ElementName("mstyle", "mstyle", (int) DispatchGroup.OTHER); 640 | public static readonly ElementName MGLYPH = new ElementName("mglyph", "mglyph", (int) DispatchGroup.MGLYPH_OR_MALIGNMARK); 641 | public static readonly ElementName MEDIAN = new ElementName("median", "median", (int) DispatchGroup.OTHER); 642 | public static readonly ElementName MUNDER = new ElementName("munder", "munder", (int) DispatchGroup.OTHER); 643 | public static readonly ElementName MARKER = new ElementName("marker", "marker", (int) DispatchGroup.OTHER); 644 | public static readonly ElementName MERROR = new ElementName("merror", "merror", (int) DispatchGroup.OTHER); 645 | public static readonly ElementName MOMENT = new ElementName("moment", "moment", (int) DispatchGroup.OTHER); 646 | public static readonly ElementName MATRIX = new ElementName("matrix", "matrix", (int) DispatchGroup.OTHER); 647 | public static readonly ElementName OPTION = new ElementName("option", "option", (int) DispatchGroup.OPTION | OPTIONAL_END_TAG); 648 | public static readonly ElementName OBJECT = new ElementName("object", "object", (int) DispatchGroup.OBJECT | SPECIAL | SCOPING); 649 | public static readonly ElementName OUTPUT = new ElementName("output", "output", (int) DispatchGroup.OUTPUT_OR_LABEL); 650 | public static readonly ElementName PRIMES = new ElementName("primes", "primes", (int) DispatchGroup.OTHER); 651 | public static readonly ElementName SOURCE = new ElementName("source", "source", (int) DispatchGroup.PARAM_OR_SOURCE_OR_TRACK); 652 | public static readonly ElementName STRIKE = new ElementName("strike", "strike", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 653 | public static readonly ElementName STRONG = new ElementName("strong", "strong", (int) DispatchGroup.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); 654 | public static readonly ElementName SWITCH = new ElementName("switch", "switch", (int) DispatchGroup.OTHER); 655 | public static readonly ElementName SYMBOL = new ElementName("symbol", "symbol", (int) DispatchGroup.OTHER); 656 | public static readonly ElementName SELECT = new ElementName("select", "select", (int) DispatchGroup.SELECT | SPECIAL); 657 | public static readonly ElementName SUBSET = new ElementName("subset", "subset", (int) DispatchGroup.OTHER); 658 | public static readonly ElementName SCRIPT = new ElementName("script", "script", (int) DispatchGroup.SCRIPT | SPECIAL); 659 | public static readonly ElementName TBREAK = new ElementName("tbreak", "tbreak", (int) DispatchGroup.OTHER); 660 | public static readonly ElementName VECTOR = new ElementName("vector", "vector", (int) DispatchGroup.OTHER); 661 | public static readonly ElementName ARTICLE = new ElementName("article", "article", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 662 | public static readonly ElementName ANIMATE = new ElementName("animate", "animate", (int) DispatchGroup.OTHER); 663 | public static readonly ElementName ARCSECH = new ElementName("arcsech", "arcsech", (int) DispatchGroup.OTHER); 664 | public static readonly ElementName ARCCSCH = new ElementName("arccsch", "arccsch", (int) DispatchGroup.OTHER); 665 | public static readonly ElementName ARCTANH = new ElementName("arctanh", "arctanh", (int) DispatchGroup.OTHER); 666 | public static readonly ElementName ARCSINH = new ElementName("arcsinh", "arcsinh", (int) DispatchGroup.OTHER); 667 | public static readonly ElementName ARCCOSH = new ElementName("arccosh", "arccosh", (int) DispatchGroup.OTHER); 668 | public static readonly ElementName ARCCOTH = new ElementName("arccoth", "arccoth", (int) DispatchGroup.OTHER); 669 | public static readonly ElementName ACRONYM = new ElementName("acronym", "acronym", (int) DispatchGroup.OTHER); 670 | public static readonly ElementName ADDRESS = new ElementName("address", "address", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 671 | public static readonly ElementName BGSOUND = new ElementName("bgsound", "bgsound", (int) DispatchGroup.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL); 672 | public static readonly ElementName COMMAND = new ElementName("command", "command", (int) DispatchGroup.COMMAND | SPECIAL); 673 | public static readonly ElementName COMPOSE = new ElementName("compose", "compose", (int) DispatchGroup.OTHER); 674 | public static readonly ElementName CEILING = new ElementName("ceiling", "ceiling", (int) DispatchGroup.OTHER); 675 | public static readonly ElementName CSYMBOL = new ElementName("csymbol", "csymbol", (int) DispatchGroup.OTHER); 676 | public static readonly ElementName CAPTION = new ElementName("caption", "caption", (int) DispatchGroup.CAPTION | SPECIAL | SCOPING); 677 | public static readonly ElementName DISCARD = new ElementName("discard", "discard", (int) DispatchGroup.OTHER); 678 | public static readonly ElementName DECLARE = new ElementName("declare", "declare", (int) DispatchGroup.OTHER); 679 | public static readonly ElementName DETAILS = new ElementName("details", "details", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 680 | public static readonly ElementName ELLIPSE = new ElementName("ellipse", "ellipse", (int) DispatchGroup.OTHER); 681 | public static readonly ElementName FEFUNCA = new ElementName("fefunca", "feFuncA", (int) DispatchGroup.OTHER); 682 | public static readonly ElementName FEFUNCB = new ElementName("fefuncb", "feFuncB", (int) DispatchGroup.OTHER); 683 | public static readonly ElementName FEBLEND = new ElementName("feblend", "feBlend", (int) DispatchGroup.OTHER); 684 | public static readonly ElementName FEFLOOD = new ElementName("feflood", "feFlood", (int) DispatchGroup.OTHER); 685 | public static readonly ElementName FEIMAGE = new ElementName("feimage", "feImage", (int) DispatchGroup.OTHER); 686 | public static readonly ElementName FEMERGE = new ElementName("femerge", "feMerge", (int) DispatchGroup.OTHER); 687 | public static readonly ElementName FEFUNCG = new ElementName("fefuncg", "feFuncG", (int) DispatchGroup.OTHER); 688 | public static readonly ElementName FEFUNCR = new ElementName("fefuncr", "feFuncR", (int) DispatchGroup.OTHER); 689 | public static readonly ElementName HANDLER = new ElementName("handler", "handler", (int) DispatchGroup.OTHER); 690 | public static readonly ElementName INVERSE = new ElementName("inverse", "inverse", (int) DispatchGroup.OTHER); 691 | public static readonly ElementName IMPLIES = new ElementName("implies", "implies", (int) DispatchGroup.OTHER); 692 | public static readonly ElementName ISINDEX = new ElementName("isindex", "isindex", (int) DispatchGroup.ISINDEX | SPECIAL); 693 | public static readonly ElementName LOGBASE = new ElementName("logbase", "logbase", (int) DispatchGroup.OTHER); 694 | public static readonly ElementName LISTING = new ElementName("listing", "listing", (int) DispatchGroup.PRE_OR_LISTING | SPECIAL); 695 | public static readonly ElementName MFENCED = new ElementName("mfenced", "mfenced", (int) DispatchGroup.OTHER); 696 | public static readonly ElementName MPADDED = new ElementName("mpadded", "mpadded", (int) DispatchGroup.OTHER); 697 | public static readonly ElementName MARQUEE = new ElementName("marquee", "marquee", (int) DispatchGroup.MARQUEE_OR_APPLET | SPECIAL | SCOPING); 698 | public static readonly ElementName MACTION = new ElementName("maction", "maction", (int) DispatchGroup.OTHER); 699 | public static readonly ElementName MSUBSUP = new ElementName("msubsup", "msubsup", (int) DispatchGroup.OTHER); 700 | public static readonly ElementName NOEMBED = new ElementName("noembed", "noembed", (int) DispatchGroup.NOEMBED | SPECIAL); 701 | public static readonly ElementName POLYGON = new ElementName("polygon", "polygon", (int) DispatchGroup.OTHER); 702 | public static readonly ElementName PATTERN = new ElementName("pattern", "pattern", (int) DispatchGroup.OTHER); 703 | public static readonly ElementName PRODUCT = new ElementName("product", "product", (int) DispatchGroup.OTHER); 704 | public static readonly ElementName SETDIFF = new ElementName("setdiff", "setdiff", (int) DispatchGroup.OTHER); 705 | public static readonly ElementName SECTION = new ElementName("section", "section", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 706 | public static readonly ElementName SUMMARY = new ElementName("summary", "summary", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 707 | public static readonly ElementName TENDSTO = new ElementName("tendsto", "tendsto", (int) DispatchGroup.OTHER); 708 | public static readonly ElementName UPLIMIT = new ElementName("uplimit", "uplimit", (int) DispatchGroup.OTHER); 709 | public static readonly ElementName ALTGLYPH = new ElementName("altglyph", "altGlyph", (int) DispatchGroup.OTHER); 710 | public static readonly ElementName BASEFONT = new ElementName("basefont", "basefont", (int) DispatchGroup.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL); 711 | public static readonly ElementName CLIPPATH = new ElementName("clippath", "clipPath", (int) DispatchGroup.OTHER); 712 | public static readonly ElementName CODOMAIN = new ElementName("codomain", "codomain", (int) DispatchGroup.OTHER); 713 | public static readonly ElementName COLGROUP = new ElementName("colgroup", "colgroup", (int) DispatchGroup.COLGROUP | SPECIAL | OPTIONAL_END_TAG); 714 | public static readonly ElementName EMPTYSET = new ElementName("emptyset", "emptyset", (int) DispatchGroup.OTHER); 715 | public static readonly ElementName FACTOROF = new ElementName("factorof", "factorof", (int) DispatchGroup.OTHER); 716 | public static readonly ElementName FIELDSET = new ElementName("fieldset", "fieldset", (int) DispatchGroup.FIELDSET | SPECIAL); 717 | public static readonly ElementName FRAMESET = new ElementName("frameset", "frameset", (int) DispatchGroup.FRAMESET | SPECIAL); 718 | public static readonly ElementName FEOFFSET = new ElementName("feoffset", "feOffset", (int) DispatchGroup.OTHER); 719 | public static readonly ElementName GLYPHREF = new ElementName("glyphref", "glyphRef", (int) DispatchGroup.OTHER); 720 | public static readonly ElementName INTERVAL = new ElementName("interval", "interval", (int) DispatchGroup.OTHER); 721 | public static readonly ElementName INTEGERS = new ElementName("integers", "integers", (int) DispatchGroup.OTHER); 722 | public static readonly ElementName INFINITY = new ElementName("infinity", "infinity", (int) DispatchGroup.OTHER); 723 | public static readonly ElementName LISTENER = new ElementName("listener", "listener", (int) DispatchGroup.OTHER); 724 | public static readonly ElementName LOWLIMIT = new ElementName("lowlimit", "lowlimit", (int) DispatchGroup.OTHER); 725 | public static readonly ElementName METADATA = new ElementName("metadata", "metadata", (int) DispatchGroup.OTHER); 726 | public static readonly ElementName MENCLOSE = new ElementName("menclose", "menclose", (int) DispatchGroup.OTHER); 727 | public static readonly ElementName MPHANTOM = new ElementName("mphantom", "mphantom", (int) DispatchGroup.OTHER); 728 | public static readonly ElementName NOFRAMES = new ElementName("noframes", "noframes", (int) DispatchGroup.NOFRAMES | SPECIAL); 729 | public static readonly ElementName NOSCRIPT = new ElementName("noscript", "noscript", (int) DispatchGroup.NOSCRIPT | SPECIAL); 730 | public static readonly ElementName OPTGROUP = new ElementName("optgroup", "optgroup", (int) DispatchGroup.OPTGROUP | SPECIAL | OPTIONAL_END_TAG); 731 | public static readonly ElementName POLYLINE = new ElementName("polyline", "polyline", (int) DispatchGroup.OTHER); 732 | public static readonly ElementName PREFETCH = new ElementName("prefetch", "prefetch", (int) DispatchGroup.OTHER); 733 | public static readonly ElementName PROGRESS = new ElementName("progress", "progress", (int) DispatchGroup.OTHER); 734 | public static readonly ElementName PRSUBSET = new ElementName("prsubset", "prsubset", (int) DispatchGroup.OTHER); 735 | public static readonly ElementName QUOTIENT = new ElementName("quotient", "quotient", (int) DispatchGroup.OTHER); 736 | public static readonly ElementName SELECTOR = new ElementName("selector", "selector", (int) DispatchGroup.OTHER); 737 | public static readonly ElementName TEXTAREA = new ElementName("textarea", "textarea", (int) DispatchGroup.TEXTAREA | SPECIAL); 738 | public static readonly ElementName TEXTPATH = new ElementName("textpath", "textPath", (int) DispatchGroup.OTHER); 739 | public static readonly ElementName VARIANCE = new ElementName("variance", "variance", (int) DispatchGroup.OTHER); 740 | public static readonly ElementName ANIMATION = new ElementName("animation", "animation", (int) DispatchGroup.OTHER); 741 | public static readonly ElementName CONJUGATE = new ElementName("conjugate", "conjugate", (int) DispatchGroup.OTHER); 742 | public static readonly ElementName CONDITION = new ElementName("condition", "condition", (int) DispatchGroup.OTHER); 743 | public static readonly ElementName COMPLEXES = new ElementName("complexes", "complexes", (int) DispatchGroup.OTHER); 744 | public static readonly ElementName FONT_FACE = new ElementName("font-face", "font-face", (int) DispatchGroup.OTHER); 745 | public static readonly ElementName FACTORIAL = new ElementName("factorial", "factorial", (int) DispatchGroup.OTHER); 746 | public static readonly ElementName INTERSECT = new ElementName("intersect", "intersect", (int) DispatchGroup.OTHER); 747 | public static readonly ElementName IMAGINARY = new ElementName("imaginary", "imaginary", (int) DispatchGroup.OTHER); 748 | public static readonly ElementName LAPLACIAN = new ElementName("laplacian", "laplacian", (int) DispatchGroup.OTHER); 749 | public static readonly ElementName MATRIXROW = new ElementName("matrixrow", "matrixrow", (int) DispatchGroup.OTHER); 750 | public static readonly ElementName NOTSUBSET = new ElementName("notsubset", "notsubset", (int) DispatchGroup.OTHER); 751 | public static readonly ElementName OTHERWISE = new ElementName("otherwise", "otherwise", (int) DispatchGroup.OTHER); 752 | public static readonly ElementName PIECEWISE = new ElementName("piecewise", "piecewise", (int) DispatchGroup.OTHER); 753 | public static readonly ElementName PLAINTEXT = new ElementName("plaintext", "plaintext", (int) DispatchGroup.PLAINTEXT | SPECIAL); 754 | public static readonly ElementName RATIONALS = new ElementName("rationals", "rationals", (int) DispatchGroup.OTHER); 755 | public static readonly ElementName SEMANTICS = new ElementName("semantics", "semantics", (int) DispatchGroup.OTHER); 756 | public static readonly ElementName TRANSPOSE = new ElementName("transpose", "transpose", (int) DispatchGroup.OTHER); 757 | public static readonly ElementName ANNOTATION = new ElementName("annotation", "annotation", (int) DispatchGroup.OTHER); 758 | public static readonly ElementName BLOCKQUOTE = new ElementName("blockquote", "blockquote", (int) DispatchGroup.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); 759 | public static readonly ElementName DIVERGENCE = new ElementName("divergence", "divergence", (int) DispatchGroup.OTHER); 760 | public static readonly ElementName EULERGAMMA = new ElementName("eulergamma", "eulergamma", (int) DispatchGroup.OTHER); 761 | public static readonly ElementName EQUIVALENT = new ElementName("equivalent", "equivalent", (int) DispatchGroup.OTHER); 762 | public static readonly ElementName FIGCAPTION = new ElementName("figcaption", "figcaption", (int) DispatchGroup.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); 763 | public static readonly ElementName IMAGINARYI = new ElementName("imaginaryi", "imaginaryi", (int) DispatchGroup.OTHER); 764 | public static readonly ElementName MALIGNMARK = new ElementName("malignmark", "malignmark", (int) DispatchGroup.MGLYPH_OR_MALIGNMARK); 765 | public static readonly ElementName MUNDEROVER = new ElementName("munderover", "munderover", (int) DispatchGroup.OTHER); 766 | public static readonly ElementName MLABELEDTR = new ElementName("mlabeledtr", "mlabeledtr", (int) DispatchGroup.OTHER); 767 | public static readonly ElementName NOTANUMBER = new ElementName("notanumber", "notanumber", (int) DispatchGroup.OTHER); 768 | public static readonly ElementName SOLIDCOLOR = new ElementName("solidcolor", "solidcolor", (int) DispatchGroup.OTHER); 769 | public static readonly ElementName ALTGLYPHDEF = new ElementName("altglyphdef", "altGlyphDef", (int) DispatchGroup.OTHER); 770 | public static readonly ElementName DETERMINANT = new ElementName("determinant", "determinant", (int) DispatchGroup.OTHER); 771 | public static readonly ElementName FEMERGENODE = new ElementName("femergenode", "feMergeNode", (int) DispatchGroup.OTHER); 772 | public static readonly ElementName FECOMPOSITE = new ElementName("fecomposite", "feComposite", (int) DispatchGroup.OTHER); 773 | public static readonly ElementName FESPOTLIGHT = new ElementName("fespotlight", "feSpotLight", (int) DispatchGroup.OTHER); 774 | public static readonly ElementName MALIGNGROUP = new ElementName("maligngroup", "maligngroup", (int) DispatchGroup.OTHER); 775 | public static readonly ElementName MPRESCRIPTS = new ElementName("mprescripts", "mprescripts", (int) DispatchGroup.OTHER); 776 | public static readonly ElementName MOMENTABOUT = new ElementName("momentabout", "momentabout", (int) DispatchGroup.OTHER); 777 | public static readonly ElementName NOTPRSUBSET = new ElementName("notprsubset", "notprsubset", (int) DispatchGroup.OTHER); 778 | public static readonly ElementName PARTIALDIFF = new ElementName("partialdiff", "partialdiff", (int) DispatchGroup.OTHER); 779 | public static readonly ElementName ALTGLYPHITEM = new ElementName("altglyphitem", "altGlyphItem", (int) DispatchGroup.OTHER); 780 | public static readonly ElementName ANIMATECOLOR = new ElementName("animatecolor", "animateColor", (int) DispatchGroup.OTHER); 781 | public static readonly ElementName DATATEMPLATE = new ElementName("datatemplate", "datatemplate", (int) DispatchGroup.OTHER); 782 | public static readonly ElementName EXPONENTIALE = new ElementName("exponentiale", "exponentiale", (int) DispatchGroup.OTHER); 783 | public static readonly ElementName FETURBULENCE = new ElementName("feturbulence", "feTurbulence", (int) DispatchGroup.OTHER); 784 | public static readonly ElementName FEPOINTLIGHT = new ElementName("fepointlight", "fePointLight", (int) DispatchGroup.OTHER); 785 | public static readonly ElementName FEMORPHOLOGY = new ElementName("femorphology", "feMorphology", (int) DispatchGroup.OTHER); 786 | public static readonly ElementName OUTERPRODUCT = new ElementName("outerproduct", "outerproduct", (int) DispatchGroup.OTHER); 787 | public static readonly ElementName ANIMATEMOTION = new ElementName("animatemotion", "animateMotion", (int) DispatchGroup.OTHER); 788 | public static readonly ElementName COLOR_PROFILE = new ElementName("color-profile", "color-profile", (int) DispatchGroup.OTHER); 789 | public static readonly ElementName FONT_FACE_SRC = new ElementName("font-face-src", "font-face-src", (int) DispatchGroup.OTHER); 790 | public static readonly ElementName FONT_FACE_URI = new ElementName("font-face-uri", "font-face-uri", (int) DispatchGroup.OTHER); 791 | public static readonly ElementName FOREIGNOBJECT = new ElementName("foreignobject", "foreignObject", (int) DispatchGroup.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG); 792 | public static readonly ElementName FECOLORMATRIX = new ElementName("fecolormatrix", "feColorMatrix", (int) DispatchGroup.OTHER); 793 | public static readonly ElementName MISSING_GLYPH = new ElementName("missing-glyph", "missing-glyph", (int) DispatchGroup.OTHER); 794 | public static readonly ElementName MMULTISCRIPTS = new ElementName("mmultiscripts", "mmultiscripts", (int) DispatchGroup.OTHER); 795 | public static readonly ElementName SCALARPRODUCT = new ElementName("scalarproduct", "scalarproduct", (int) DispatchGroup.OTHER); 796 | public static readonly ElementName VECTORPRODUCT = new ElementName("vectorproduct", "vectorproduct", (int) DispatchGroup.OTHER); 797 | public static readonly ElementName ANNOTATION_XML = new ElementName("annotation-xml", "annotation-xml", (int) DispatchGroup.ANNOTATION_XML | SCOPING_AS_MATHML); 798 | public static readonly ElementName DEFINITION_SRC = new ElementName("definition-src", "definition-src", (int) DispatchGroup.OTHER); 799 | public static readonly ElementName FONT_FACE_NAME = new ElementName("font-face-name", "font-face-name", (int) DispatchGroup.OTHER); 800 | public static readonly ElementName FEGAUSSIANBLUR = new ElementName("fegaussianblur", "feGaussianBlur", (int) DispatchGroup.OTHER); 801 | public static readonly ElementName FEDISTANTLIGHT = new ElementName("fedistantlight", "feDistantLight", (int) DispatchGroup.OTHER); 802 | public static readonly ElementName LINEARGRADIENT = new ElementName("lineargradient", "linearGradient", (int) DispatchGroup.OTHER); 803 | public static readonly ElementName NATURALNUMBERS = new ElementName("naturalnumbers", "naturalnumbers", (int) DispatchGroup.OTHER); 804 | public static readonly ElementName RADIALGRADIENT = new ElementName("radialgradient", "radialGradient", (int) DispatchGroup.OTHER); 805 | public static readonly ElementName ANIMATETRANSFORM = new ElementName("animatetransform", "animateTransform", (int) DispatchGroup.OTHER); 806 | public static readonly ElementName CARTESIANPRODUCT = new ElementName("cartesianproduct", "cartesianproduct", (int) DispatchGroup.OTHER); 807 | public static readonly ElementName FONT_FACE_FORMAT = new ElementName("font-face-format", "font-face-format", (int) DispatchGroup.OTHER); 808 | public static readonly ElementName FECONVOLVEMATRIX = new ElementName("feconvolvematrix", "feConvolveMatrix", (int) DispatchGroup.OTHER); 809 | public static readonly ElementName FEDIFFUSELIGHTING = new ElementName("fediffuselighting", "feDiffuseLighting", (int) DispatchGroup.OTHER); 810 | public static readonly ElementName FEDISPLACEMENTMAP = new ElementName("fedisplacementmap", "feDisplacementMap", (int) DispatchGroup.OTHER); 811 | public static readonly ElementName FESPECULARLIGHTING = new ElementName("fespecularlighting", "feSpecularLighting", (int) DispatchGroup.OTHER); 812 | public static readonly ElementName DOMAINOFAPPLICATION = new ElementName("domainofapplication", "domainofapplication", (int) DispatchGroup.OTHER); 813 | public static readonly ElementName FECOMPONENTTRANSFER = new ElementName("fecomponenttransfer", "feComponentTransfer", (int) DispatchGroup.OTHER); 814 | private static readonly ElementName[] ELEMENT_NAMES = { 815 | A, 816 | B, 817 | G, 818 | I, 819 | P, 820 | Q, 821 | S, 822 | U, 823 | BR, 824 | CI, 825 | CN, 826 | DD, 827 | DL, 828 | DT, 829 | EM, 830 | EQ, 831 | FN, 832 | H1, 833 | H2, 834 | H3, 835 | H4, 836 | H5, 837 | H6, 838 | GT, 839 | HR, 840 | IN, 841 | LI, 842 | LN, 843 | LT, 844 | MI, 845 | MN, 846 | MO, 847 | MS, 848 | OL, 849 | OR, 850 | PI, 851 | RP, 852 | RT, 853 | TD, 854 | TH, 855 | TR, 856 | TT, 857 | UL, 858 | AND, 859 | ARG, 860 | ABS, 861 | BIG, 862 | BDO, 863 | CSC, 864 | COL, 865 | COS, 866 | COT, 867 | DEL, 868 | DFN, 869 | DIR, 870 | DIV, 871 | EXP, 872 | GCD, 873 | GEQ, 874 | IMG, 875 | INS, 876 | INT, 877 | KBD, 878 | LOG, 879 | LCM, 880 | LEQ, 881 | MTD, 882 | MIN, 883 | MAP, 884 | MTR, 885 | MAX, 886 | NEQ, 887 | NOT, 888 | NAV, 889 | PRE, 890 | REM, 891 | SUB, 892 | SEC, 893 | SVG, 894 | SUM, 895 | SIN, 896 | SEP, 897 | SUP, 898 | SET, 899 | TAN, 900 | USE, 901 | VAR, 902 | WBR, 903 | XMP, 904 | XOR, 905 | AREA, 906 | ABBR, 907 | BASE, 908 | BVAR, 909 | BODY, 910 | CARD, 911 | CODE, 912 | CITE, 913 | CSCH, 914 | COSH, 915 | COTH, 916 | CURL, 917 | DESC, 918 | DIFF, 919 | DEFS, 920 | FORM, 921 | FONT, 922 | GRAD, 923 | HEAD, 924 | HTML, 925 | LINE, 926 | LINK, 927 | LIST, 928 | META, 929 | MSUB, 930 | MODE, 931 | MATH, 932 | MARK, 933 | MASK, 934 | MEAN, 935 | MSUP, 936 | MENU, 937 | MROW, 938 | NONE, 939 | NOBR, 940 | NEST, 941 | PATH, 942 | PLUS, 943 | RULE, 944 | REAL, 945 | RELN, 946 | RECT, 947 | ROOT, 948 | RUBY, 949 | SECH, 950 | SINH, 951 | SPAN, 952 | SAMP, 953 | STOP, 954 | SDEV, 955 | TIME, 956 | TRUE, 957 | TREF, 958 | TANH, 959 | TEXT, 960 | VIEW, 961 | ASIDE, 962 | AUDIO, 963 | APPLY, 964 | EMBED, 965 | FRAME, 966 | FALSE, 967 | FLOOR, 968 | GLYPH, 969 | HKERN, 970 | IMAGE, 971 | IDENT, 972 | INPUT, 973 | LABEL, 974 | LIMIT, 975 | MFRAC, 976 | MPATH, 977 | METER, 978 | MOVER, 979 | MINUS, 980 | MROOT, 981 | MSQRT, 982 | MTEXT, 983 | NOTIN, 984 | PIECE, 985 | PARAM, 986 | POWER, 987 | REALS, 988 | STYLE, 989 | SMALL, 990 | THEAD, 991 | TABLE, 992 | TITLE, 993 | TRACK, 994 | TSPAN, 995 | TIMES, 996 | TFOOT, 997 | TBODY, 998 | UNION, 999 | VKERN, 1000 | VIDEO, 1001 | ARCSEC, 1002 | ARCCSC, 1003 | ARCTAN, 1004 | ARCSIN, 1005 | ARCCOS, 1006 | APPLET, 1007 | ARCCOT, 1008 | APPROX, 1009 | BUTTON, 1010 | CIRCLE, 1011 | CENTER, 1012 | CURSOR, 1013 | CANVAS, 1014 | DIVIDE, 1015 | DEGREE, 1016 | DOMAIN, 1017 | EXISTS, 1018 | FETILE, 1019 | FIGURE, 1020 | FORALL, 1021 | FILTER, 1022 | FOOTER, 1023 | HGROUP, 1024 | HEADER, 1025 | IFRAME, 1026 | KEYGEN, 1027 | LAMBDA, 1028 | LEGEND, 1029 | MSPACE, 1030 | MTABLE, 1031 | MSTYLE, 1032 | MGLYPH, 1033 | MEDIAN, 1034 | MUNDER, 1035 | MARKER, 1036 | MERROR, 1037 | MOMENT, 1038 | MATRIX, 1039 | OPTION, 1040 | OBJECT, 1041 | OUTPUT, 1042 | PRIMES, 1043 | SOURCE, 1044 | STRIKE, 1045 | STRONG, 1046 | SWITCH, 1047 | SYMBOL, 1048 | SELECT, 1049 | SUBSET, 1050 | SCRIPT, 1051 | TBREAK, 1052 | VECTOR, 1053 | ARTICLE, 1054 | ANIMATE, 1055 | ARCSECH, 1056 | ARCCSCH, 1057 | ARCTANH, 1058 | ARCSINH, 1059 | ARCCOSH, 1060 | ARCCOTH, 1061 | ACRONYM, 1062 | ADDRESS, 1063 | BGSOUND, 1064 | COMMAND, 1065 | COMPOSE, 1066 | CEILING, 1067 | CSYMBOL, 1068 | CAPTION, 1069 | DISCARD, 1070 | DECLARE, 1071 | DETAILS, 1072 | ELLIPSE, 1073 | FEFUNCA, 1074 | FEFUNCB, 1075 | FEBLEND, 1076 | FEFLOOD, 1077 | FEIMAGE, 1078 | FEMERGE, 1079 | FEFUNCG, 1080 | FEFUNCR, 1081 | HANDLER, 1082 | INVERSE, 1083 | IMPLIES, 1084 | ISINDEX, 1085 | LOGBASE, 1086 | LISTING, 1087 | MFENCED, 1088 | MPADDED, 1089 | MARQUEE, 1090 | MACTION, 1091 | MSUBSUP, 1092 | NOEMBED, 1093 | POLYGON, 1094 | PATTERN, 1095 | PRODUCT, 1096 | SETDIFF, 1097 | SECTION, 1098 | SUMMARY, 1099 | TENDSTO, 1100 | UPLIMIT, 1101 | ALTGLYPH, 1102 | BASEFONT, 1103 | CLIPPATH, 1104 | CODOMAIN, 1105 | COLGROUP, 1106 | EMPTYSET, 1107 | FACTOROF, 1108 | FIELDSET, 1109 | FRAMESET, 1110 | FEOFFSET, 1111 | GLYPHREF, 1112 | INTERVAL, 1113 | INTEGERS, 1114 | INFINITY, 1115 | LISTENER, 1116 | LOWLIMIT, 1117 | METADATA, 1118 | MENCLOSE, 1119 | MPHANTOM, 1120 | NOFRAMES, 1121 | NOSCRIPT, 1122 | OPTGROUP, 1123 | POLYLINE, 1124 | PREFETCH, 1125 | PROGRESS, 1126 | PRSUBSET, 1127 | QUOTIENT, 1128 | SELECTOR, 1129 | TEXTAREA, 1130 | TEXTPATH, 1131 | VARIANCE, 1132 | ANIMATION, 1133 | CONJUGATE, 1134 | CONDITION, 1135 | COMPLEXES, 1136 | FONT_FACE, 1137 | FACTORIAL, 1138 | INTERSECT, 1139 | IMAGINARY, 1140 | LAPLACIAN, 1141 | MATRIXROW, 1142 | NOTSUBSET, 1143 | OTHERWISE, 1144 | PIECEWISE, 1145 | PLAINTEXT, 1146 | RATIONALS, 1147 | SEMANTICS, 1148 | TRANSPOSE, 1149 | ANNOTATION, 1150 | BLOCKQUOTE, 1151 | DIVERGENCE, 1152 | EULERGAMMA, 1153 | EQUIVALENT, 1154 | FIGCAPTION, 1155 | IMAGINARYI, 1156 | MALIGNMARK, 1157 | MUNDEROVER, 1158 | MLABELEDTR, 1159 | NOTANUMBER, 1160 | SOLIDCOLOR, 1161 | ALTGLYPHDEF, 1162 | DETERMINANT, 1163 | FEMERGENODE, 1164 | FECOMPOSITE, 1165 | FESPOTLIGHT, 1166 | MALIGNGROUP, 1167 | MPRESCRIPTS, 1168 | MOMENTABOUT, 1169 | NOTPRSUBSET, 1170 | PARTIALDIFF, 1171 | ALTGLYPHITEM, 1172 | ANIMATECOLOR, 1173 | DATATEMPLATE, 1174 | EXPONENTIALE, 1175 | FETURBULENCE, 1176 | FEPOINTLIGHT, 1177 | FEMORPHOLOGY, 1178 | OUTERPRODUCT, 1179 | ANIMATEMOTION, 1180 | COLOR_PROFILE, 1181 | FONT_FACE_SRC, 1182 | FONT_FACE_URI, 1183 | FOREIGNOBJECT, 1184 | FECOLORMATRIX, 1185 | MISSING_GLYPH, 1186 | MMULTISCRIPTS, 1187 | SCALARPRODUCT, 1188 | VECTORPRODUCT, 1189 | ANNOTATION_XML, 1190 | DEFINITION_SRC, 1191 | FONT_FACE_NAME, 1192 | FEGAUSSIANBLUR, 1193 | FEDISTANTLIGHT, 1194 | LINEARGRADIENT, 1195 | NATURALNUMBERS, 1196 | RADIALGRADIENT, 1197 | ANIMATETRANSFORM, 1198 | CARTESIANPRODUCT, 1199 | FONT_FACE_FORMAT, 1200 | FECONVOLVEMATRIX, 1201 | FEDIFFUSELIGHTING, 1202 | FEDISPLACEMENTMAP, 1203 | FESPECULARLIGHTING, 1204 | DOMAINOFAPPLICATION, 1205 | FECOMPONENTTRANSFER, 1206 | }; 1207 | private static readonly int[] ELEMENT_HASHES = { 1208 | 1057, 1209 | 1090, 1210 | 1255, 1211 | 1321, 1212 | 1552, 1213 | 1585, 1214 | 1651, 1215 | 1717, 1216 | 68162, 1217 | 68899, 1218 | 69059, 1219 | 69764, 1220 | 70020, 1221 | 70276, 1222 | 71077, 1223 | 71205, 1224 | 72134, 1225 | 72232, 1226 | 72264, 1227 | 72296, 1228 | 72328, 1229 | 72360, 1230 | 72392, 1231 | 73351, 1232 | 74312, 1233 | 75209, 1234 | 78124, 1235 | 78284, 1236 | 78476, 1237 | 79149, 1238 | 79309, 1239 | 79341, 1240 | 79469, 1241 | 81295, 1242 | 81487, 1243 | 82224, 1244 | 84498, 1245 | 84626, 1246 | 86164, 1247 | 86292, 1248 | 86612, 1249 | 86676, 1250 | 87445, 1251 | 3183041, 1252 | 3186241, 1253 | 3198017, 1254 | 3218722, 1255 | 3226754, 1256 | 3247715, 1257 | 3256803, 1258 | 3263971, 1259 | 3264995, 1260 | 3289252, 1261 | 3291332, 1262 | 3295524, 1263 | 3299620, 1264 | 3326725, 1265 | 3379303, 1266 | 3392679, 1267 | 3448233, 1268 | 3460553, 1269 | 3461577, 1270 | 3510347, 1271 | 3546604, 1272 | 3552364, 1273 | 3556524, 1274 | 3576461, 1275 | 3586349, 1276 | 3588141, 1277 | 3590797, 1278 | 3596333, 1279 | 3622062, 1280 | 3625454, 1281 | 3627054, 1282 | 3675728, 1283 | 3749042, 1284 | 3771059, 1285 | 3771571, 1286 | 3776211, 1287 | 3782323, 1288 | 3782963, 1289 | 3784883, 1290 | 3785395, 1291 | 3788979, 1292 | 3815476, 1293 | 3839605, 1294 | 3885110, 1295 | 3917911, 1296 | 3948984, 1297 | 3951096, 1298 | 135304769, 1299 | 135858241, 1300 | 136498210, 1301 | 136906434, 1302 | 137138658, 1303 | 137512995, 1304 | 137531875, 1305 | 137548067, 1306 | 137629283, 1307 | 137645539, 1308 | 137646563, 1309 | 137775779, 1310 | 138529956, 1311 | 138615076, 1312 | 139040932, 1313 | 140954086, 1314 | 141179366, 1315 | 141690439, 1316 | 142738600, 1317 | 143013512, 1318 | 146979116, 1319 | 147175724, 1320 | 147475756, 1321 | 147902637, 1322 | 147936877, 1323 | 148017645, 1324 | 148131885, 1325 | 148228141, 1326 | 148229165, 1327 | 148309165, 1328 | 148395629, 1329 | 148551853, 1330 | 148618829, 1331 | 149076462, 1332 | 149490158, 1333 | 149572782, 1334 | 151277616, 1335 | 151639440, 1336 | 153268914, 1337 | 153486514, 1338 | 153563314, 1339 | 153750706, 1340 | 153763314, 1341 | 153914034, 1342 | 154406067, 1343 | 154417459, 1344 | 154600979, 1345 | 154678323, 1346 | 154680979, 1347 | 154866835, 1348 | 155366708, 1349 | 155375188, 1350 | 155391572, 1351 | 155465780, 1352 | 155869364, 1353 | 158045494, 1354 | 168988979, 1355 | 169321621, 1356 | 169652752, 1357 | 173151309, 1358 | 174240818, 1359 | 174247297, 1360 | 174669292, 1361 | 175391532, 1362 | 176638123, 1363 | 177380397, 1364 | 177879204, 1365 | 177886734, 1366 | 180753473, 1367 | 181020073, 1368 | 181503558, 1369 | 181686320, 1370 | 181999237, 1371 | 181999311, 1372 | 182048201, 1373 | 182074866, 1374 | 182078003, 1375 | 182083764, 1376 | 182920847, 1377 | 184716457, 1378 | 184976961, 1379 | 185145071, 1380 | 187281445, 1381 | 187872052, 1382 | 188100653, 1383 | 188875944, 1384 | 188919873, 1385 | 188920457, 1386 | 189107250, 1387 | 189203987, 1388 | 189371817, 1389 | 189414886, 1390 | 189567458, 1391 | 190266670, 1392 | 191318187, 1393 | 191337609, 1394 | 202479203, 1395 | 202493027, 1396 | 202835587, 1397 | 202843747, 1398 | 203013219, 1399 | 203036048, 1400 | 203045987, 1401 | 203177552, 1402 | 203898516, 1403 | 204648562, 1404 | 205067918, 1405 | 205078130, 1406 | 205096654, 1407 | 205689142, 1408 | 205690439, 1409 | 205988909, 1410 | 207213161, 1411 | 207794484, 1412 | 207800999, 1413 | 208023602, 1414 | 208213644, 1415 | 208213647, 1416 | 210261490, 1417 | 210310273, 1418 | 210940978, 1419 | 213325049, 1420 | 213946445, 1421 | 214055079, 1422 | 215125040, 1423 | 215134273, 1424 | 215135028, 1425 | 215237420, 1426 | 215418148, 1427 | 215553166, 1428 | 215553394, 1429 | 215563858, 1430 | 215627949, 1431 | 215754324, 1432 | 217529652, 1433 | 217713834, 1434 | 217732628, 1435 | 218731945, 1436 | 221417045, 1437 | 221424946, 1438 | 221493746, 1439 | 221515401, 1440 | 221658189, 1441 | 221908140, 1442 | 221910626, 1443 | 221921586, 1444 | 222659762, 1445 | 225001091, 1446 | 236105833, 1447 | 236113965, 1448 | 236194995, 1449 | 236195427, 1450 | 236206132, 1451 | 236206387, 1452 | 236211683, 1453 | 236212707, 1454 | 236381647, 1455 | 236571826, 1456 | 237124271, 1457 | 238172205, 1458 | 238210544, 1459 | 238270764, 1460 | 238435405, 1461 | 238501172, 1462 | 239224867, 1463 | 239257644, 1464 | 239710497, 1465 | 240307721, 1466 | 241208789, 1467 | 241241557, 1468 | 241318060, 1469 | 241319404, 1470 | 241343533, 1471 | 241344069, 1472 | 241405397, 1473 | 241765845, 1474 | 243864964, 1475 | 244502085, 1476 | 244946220, 1477 | 245109902, 1478 | 247647266, 1479 | 247707956, 1480 | 248648814, 1481 | 248648836, 1482 | 248682161, 1483 | 248986932, 1484 | 249058914, 1485 | 249697357, 1486 | 252132601, 1487 | 252135604, 1488 | 252317348, 1489 | 255007012, 1490 | 255278388, 1491 | 255641645, 1492 | 256365156, 1493 | 257566121, 1494 | 269763372, 1495 | 271202790, 1496 | 271863856, 1497 | 272049197, 1498 | 272127474, 1499 | 274339449, 1500 | 274939471, 1501 | 275388004, 1502 | 275388005, 1503 | 275388006, 1504 | 275977800, 1505 | 278267602, 1506 | 278513831, 1507 | 278712622, 1508 | 281613765, 1509 | 281683369, 1510 | 282120228, 1511 | 282250732, 1512 | 282508942, 1513 | 283743649, 1514 | 283787570, 1515 | 284710386, 1516 | 285391148, 1517 | 285478533, 1518 | 285854898, 1519 | 285873762, 1520 | 286931113, 1521 | 288964227, 1522 | 289445441, 1523 | 289689648, 1524 | 291671489, 1525 | 303512884, 1526 | 305319975, 1527 | 305610036, 1528 | 305764101, 1529 | 308448294, 1530 | 308675890, 1531 | 312085683, 1532 | 312264750, 1533 | 315032867, 1534 | 316391000, 1535 | 317331042, 1536 | 317902135, 1537 | 318950711, 1538 | 319447220, 1539 | 321499182, 1540 | 322538804, 1541 | 323145200, 1542 | 337067316, 1543 | 337826293, 1544 | 339905989, 1545 | 340833697, 1546 | 341457068, 1547 | 342310196, 1548 | 345302593, 1549 | 349554733, 1550 | 349771471, 1551 | 349786245, 1552 | 350819405, 1553 | 356072847, 1554 | 370349192, 1555 | 373962798, 1556 | 375558638, 1557 | 375574835, 1558 | 376053993, 1559 | 383276530, 1560 | 383373833, 1561 | 383407586, 1562 | 384439906, 1563 | 386079012, 1564 | 404133513, 1565 | 404307343, 1566 | 407031852, 1567 | 408072233, 1568 | 409112005, 1569 | 409608425, 1570 | 409771500, 1571 | 419040932, 1572 | 437730612, 1573 | 439529766, 1574 | 442616365, 1575 | 442813037, 1576 | 443157674, 1577 | 443295316, 1578 | 450118444, 1579 | 450482697, 1580 | 456789668, 1581 | 459935396, 1582 | 471217869, 1583 | 474073645, 1584 | 476230702, 1585 | 476665218, 1586 | 476717289, 1587 | 483014825, 1588 | 485083298, 1589 | 489306281, 1590 | 538364390, 1591 | 540675748, 1592 | 543819186, 1593 | 543958612, 1594 | 576960820, 1595 | 577242548, 1596 | 610515252, 1597 | 642202932, 1598 | 644420819, 1599 | }; 1600 | } 1601 | 1602 | } 1603 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/HtmlAttributes.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2008-2011 Mozilla Foundation 4 | * Copyright (c) 2012 Patrick Reisert 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | * DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | using System; 26 | using System.Diagnostics; 27 | using HtmlParserSharp.Common; 28 | 29 | namespace HtmlParserSharp.Core 30 | { 31 | /// 32 | /// Be careful with this class. QName is the name in from HTML tokenization. 33 | /// Otherwise, please refer to the interface doc. 34 | /// 35 | public sealed class HtmlAttributes : IEquatable /* : Sax.IAttributes*/ { 36 | 37 | // [NOCPP[ 38 | 39 | private static readonly AttributeName[] EMPTY_ATTRIBUTENAMES = new AttributeName[0]; 40 | 41 | private static readonly string[] EMPTY_stringS = new string[0]; 42 | 43 | // ]NOCPP] 44 | 45 | public static readonly HtmlAttributes EMPTY_ATTRIBUTES = new HtmlAttributes(AttributeName.HTML); 46 | 47 | private int mode; 48 | 49 | private int length; 50 | 51 | private AttributeName[] names; 52 | 53 | private string[] values; 54 | 55 | // [NOCPP[ 56 | 57 | private string idValue; 58 | 59 | private int xmlnsLength; 60 | 61 | private AttributeName[] xmlnsNames; 62 | 63 | private string[] xmlnsValues; 64 | 65 | // ]NOCPP] 66 | 67 | public HtmlAttributes(int mode) 68 | { 69 | this.mode = mode; 70 | this.length = 0; 71 | /* 72 | * The length of 5 covers covers 98.3% of elements 73 | * according to Hixie 74 | */ 75 | this.names = new AttributeName[5]; 76 | this.values = new string[5]; 77 | 78 | // [NOCPP[ 79 | 80 | this.idValue = null; 81 | 82 | this.xmlnsLength = 0; 83 | 84 | this.xmlnsNames = HtmlAttributes.EMPTY_ATTRIBUTENAMES; 85 | 86 | this.xmlnsValues = HtmlAttributes.EMPTY_stringS; 87 | 88 | // ]NOCPP] 89 | } 90 | /* 91 | public HtmlAttributes(HtmlAttributes other) { 92 | this.mode = other.mode; 93 | this.length = other.length; 94 | this.names = new AttributeName[other.length]; 95 | this.values = new string[other.length]; 96 | // [NOCPP[ 97 | this.idValue = other.idValue; 98 | this.xmlnsLength = other.xmlnsLength; 99 | this.xmlnsNames = new AttributeName[other.xmlnsLength]; 100 | this.xmlnsValues = new string[other.xmlnsLength]; 101 | // ]NOCPP] 102 | } 103 | */ 104 | 105 | /// 106 | /// Only use with a static argument 107 | /// 108 | public int GetIndex(AttributeName name) 109 | { 110 | for (int i = 0; i < length; i++) 111 | { 112 | if (names[i] == name) 113 | { 114 | return i; 115 | } 116 | } 117 | return -1; 118 | } 119 | 120 | // [NOCPP[ 121 | 122 | public int GetIndex(string qName) 123 | { 124 | for (int i = 0; i < length; i++) 125 | { 126 | if (names[i].GetQName(mode) == qName) 127 | { 128 | return i; 129 | } 130 | } 131 | return -1; 132 | } 133 | 134 | public int GetIndex(string uri, string localName) 135 | { 136 | for (int i = 0; i < length; i++) 137 | { 138 | if (names[i].GetLocal(mode) == localName 139 | && names[i].GetUri(mode) == uri) 140 | { 141 | return i; 142 | } 143 | } 144 | return -1; 145 | } 146 | 147 | public string GetType(string qName) 148 | { 149 | int index = GetIndex(qName); 150 | if (index == -1) 151 | { 152 | return null; 153 | } 154 | else 155 | { 156 | return GetType(index); 157 | } 158 | } 159 | 160 | public string GetType(string uri, string localName) 161 | { 162 | int index = GetIndex(uri, localName); 163 | if (index == -1) 164 | { 165 | return null; 166 | } 167 | else 168 | { 169 | return GetType(index); 170 | } 171 | } 172 | 173 | public string GetValue(string qName) 174 | { 175 | int index = GetIndex(qName); 176 | if (index == -1) 177 | { 178 | return null; 179 | } 180 | else 181 | { 182 | return GetValue(index); 183 | } 184 | } 185 | 186 | public string GetValue(string uri, string localName) 187 | { 188 | int index = GetIndex(uri, localName); 189 | if (index == -1) 190 | { 191 | return null; 192 | } 193 | else 194 | { 195 | return GetValue(index); 196 | } 197 | } 198 | 199 | // ]NOCPP] 200 | 201 | public int Length 202 | { 203 | get 204 | { 205 | return length; 206 | } 207 | } 208 | 209 | [Local] 210 | public string GetLocalName(int index) 211 | { 212 | if (index < length && index >= 0) 213 | { 214 | return names[index].GetLocal(mode); 215 | } 216 | else 217 | { 218 | return null; 219 | } 220 | } 221 | 222 | // [NOCPP[ 223 | 224 | public string GetQName(int index) 225 | { 226 | if (index < length && index >= 0) 227 | { 228 | return names[index].GetQName(mode); 229 | } 230 | else 231 | { 232 | return null; 233 | } 234 | } 235 | 236 | public string GetType(int index) 237 | { 238 | if (index < length && index >= 0) 239 | { 240 | return (names[index] == AttributeName.ID) ? "ID" : "CDATA"; 241 | } 242 | else 243 | { 244 | return null; 245 | } 246 | } 247 | 248 | // ]NOCPP] 249 | 250 | public AttributeName GetAttributeName(int index) 251 | { 252 | if (index < length && index >= 0) 253 | { 254 | return names[index]; 255 | } 256 | else 257 | { 258 | return null; 259 | } 260 | } 261 | 262 | [NsUri] 263 | public string GetURI(int index) 264 | { 265 | if (index < length && index >= 0) 266 | { 267 | return names[index].GetUri(mode); 268 | } 269 | else 270 | { 271 | return null; 272 | } 273 | } 274 | 275 | [Prefix] 276 | public string GetPrefix(int index) 277 | { 278 | if (index < length && index >= 0) 279 | { 280 | return names[index].GetPrefix(mode); 281 | } 282 | else 283 | { 284 | return null; 285 | } 286 | } 287 | 288 | public string GetValue(int index) 289 | { 290 | if (index < length && index >= 0) 291 | { 292 | return values[index]; 293 | } 294 | else 295 | { 296 | return null; 297 | } 298 | } 299 | 300 | /// 301 | /// Only use with static argument. 302 | /// 303 | public string GetValue(AttributeName name) 304 | { 305 | int index = GetIndex(name); 306 | if (index == -1) 307 | { 308 | return null; 309 | } 310 | else 311 | { 312 | return GetValue(index); 313 | } 314 | } 315 | 316 | // [NOCPP[ 317 | 318 | public string Id 319 | { 320 | get 321 | { 322 | return idValue; 323 | } 324 | } 325 | 326 | public int XmlnsLength 327 | { 328 | get 329 | { 330 | return xmlnsLength; 331 | } 332 | } 333 | 334 | [Local] 335 | public string GetXmlnsLocalName(int index) 336 | { 337 | if (index < xmlnsLength && index >= 0) 338 | { 339 | return xmlnsNames[index].GetLocal(mode); 340 | } 341 | else 342 | { 343 | return null; 344 | } 345 | } 346 | 347 | [NsUri] 348 | public string GetXmlnsURI(int index) 349 | { 350 | if (index < xmlnsLength && index >= 0) 351 | { 352 | return xmlnsNames[index].GetUri(mode); 353 | } 354 | else 355 | { 356 | return null; 357 | } 358 | } 359 | 360 | public string GetXmlnsValue(int index) 361 | { 362 | if (index < xmlnsLength && index >= 0) 363 | { 364 | return xmlnsValues[index]; 365 | } 366 | else 367 | { 368 | return null; 369 | } 370 | } 371 | 372 | public int GetXmlnsIndex(AttributeName name) 373 | { 374 | for (int i = 0; i < xmlnsLength; i++) 375 | { 376 | if (xmlnsNames[i] == name) 377 | { 378 | return i; 379 | } 380 | } 381 | return -1; 382 | } 383 | 384 | public string GetXmlnsValue(AttributeName name) 385 | { 386 | int index = GetXmlnsIndex(name); 387 | if (index == -1) 388 | { 389 | return null; 390 | } 391 | else 392 | { 393 | return GetXmlnsValue(index); 394 | } 395 | } 396 | 397 | public AttributeName GetXmlnsAttributeName(int index) 398 | { 399 | if (index < xmlnsLength && index >= 0) 400 | { 401 | return xmlnsNames[index]; 402 | } 403 | else 404 | { 405 | return null; 406 | } 407 | } 408 | 409 | // ]NOCPP] 410 | 411 | internal void AddAttribute(AttributeName name, string value 412 | // [NOCPP[ 413 | , XmlViolationPolicy xmlnsPolicy 414 | // ]NOCPP] 415 | ) 416 | { 417 | // [NOCPP[ 418 | if (name == AttributeName.ID) 419 | { 420 | idValue = value; 421 | } 422 | 423 | if (name.IsXmlns) 424 | { 425 | if (xmlnsNames.Length == xmlnsLength) 426 | { 427 | int newLen = xmlnsLength == 0 ? 2 : xmlnsLength << 1; 428 | AttributeName[] newNames = new AttributeName[newLen]; 429 | Array.Copy(xmlnsNames, newNames, xmlnsNames.Length); 430 | xmlnsNames = newNames; 431 | string[] newValues = new string[newLen]; 432 | Array.Copy(xmlnsValues, newValues, xmlnsValues.Length); 433 | xmlnsValues = newValues; 434 | } 435 | xmlnsNames[xmlnsLength] = name; 436 | xmlnsValues[xmlnsLength] = value; 437 | xmlnsLength++; 438 | switch (xmlnsPolicy) 439 | { 440 | case XmlViolationPolicy.Fatal: 441 | // this is ugly (TODO) 442 | throw new Exception("Saw an xmlns attribute."); 443 | case XmlViolationPolicy.AlterInfoset: 444 | return; 445 | case XmlViolationPolicy.Allow: 446 | break; // fall through 447 | } 448 | } 449 | 450 | // ]NOCPP] 451 | 452 | if (names.Length == length) 453 | { 454 | int newLen = length << 1; // The first growth covers virtually 455 | // 100% of elements according to 456 | // Hixie 457 | AttributeName[] newNames = new AttributeName[newLen]; 458 | Array.Copy(names, newNames, names.Length); 459 | names = newNames; 460 | string[] newValues = new string[newLen]; 461 | Array.Copy(values, newValues, values.Length); 462 | values = newValues; 463 | } 464 | names[length] = name; 465 | values[length] = value; 466 | length++; 467 | } 468 | 469 | internal void Clear(int m) 470 | { 471 | for (int i = 0; i < length; i++) 472 | { 473 | names[i] = null; 474 | values[i] = null; 475 | } 476 | length = 0; 477 | mode = m; 478 | // [NOCPP[ 479 | idValue = null; 480 | for (int i = 0; i < xmlnsLength; i++) 481 | { 482 | xmlnsNames[i] = null; 483 | xmlnsValues[i] = null; 484 | } 485 | xmlnsLength = 0; 486 | // ]NOCPP] 487 | } 488 | 489 | /// 490 | /// This is only used for AttributeName ownership transfer 491 | /// in the isindex case to avoid freeing custom names twice in C++. 492 | /// 493 | internal void ClearWithoutReleasingContents() 494 | { 495 | for (int i = 0; i < length; i++) 496 | { 497 | names[i] = null; 498 | values[i] = null; 499 | } 500 | length = 0; 501 | } 502 | 503 | public bool Contains(AttributeName name) 504 | { 505 | for (int i = 0; i < length; i++) 506 | { 507 | if (name.EqualsAnother(names[i])) 508 | { 509 | return true; 510 | } 511 | } 512 | // [NOCPP[ 513 | for (int i = 0; i < xmlnsLength; i++) 514 | { 515 | if (name.EqualsAnother(xmlnsNames[i])) 516 | { 517 | return true; 518 | } 519 | } 520 | // ]NOCPP] 521 | return false; 522 | } 523 | 524 | public void AdjustForMath() 525 | { 526 | mode = AttributeName.MATHML; 527 | } 528 | 529 | public void AdjustForSvg() 530 | { 531 | mode = AttributeName.SVG; 532 | } 533 | 534 | public HtmlAttributes CloneAttributes() 535 | { 536 | Debug.Assert((length == 0 && xmlnsLength == 0) || mode == 0 || mode == 3); 537 | HtmlAttributes clone = new HtmlAttributes(0); 538 | for (int i = 0; i < length; i++) 539 | { 540 | clone.AddAttribute(names[i].CloneAttributeName(), values[i] 541 | // [NOCPP[ 542 | , XmlViolationPolicy.Allow 543 | // ]NOCPP] 544 | ); 545 | } 546 | // [NOCPP[ 547 | for (int i = 0; i < xmlnsLength; i++) 548 | { 549 | clone.AddAttribute(xmlnsNames[i], 550 | xmlnsValues[i], XmlViolationPolicy.Allow); 551 | } 552 | // ]NOCPP] 553 | return clone; // XXX!!! 554 | } 555 | 556 | public bool Equals(HtmlAttributes other) 557 | { 558 | Debug.Assert(mode == 0 || mode == 3, "Trying to compare attributes in foreign content."); 559 | int otherLength = other.Length; 560 | if (length != otherLength) 561 | { 562 | return false; 563 | } 564 | for (int i = 0; i < length; i++) 565 | { 566 | // Work around the limitations of C++ 567 | bool found = false; 568 | // The comparing just the local names is OK, since these attribute 569 | // holders are both supposed to belong to HTML formatting elements 570 | /*[Local]*/ 571 | string ownLocal = names[i].GetLocal(AttributeName.HTML); 572 | for (int j = 0; j < otherLength; j++) 573 | { 574 | if (ownLocal == other.names[j].GetLocal(AttributeName.HTML)) 575 | { 576 | found = true; 577 | if (values[i] != other.values[j]) 578 | { 579 | return false; 580 | } 581 | } 582 | } 583 | if (!found) 584 | { 585 | return false; 586 | } 587 | } 588 | return true; 589 | } 590 | 591 | // [NOCPP[ 592 | 593 | internal void ProcessNonNcNames(TreeBuilder treeBuilder, XmlViolationPolicy namePolicy) where T : class 594 | { 595 | for (int i = 0; i < length; i++) 596 | { 597 | AttributeName attName = names[i]; 598 | if (!attName.IsNcName(mode)) 599 | { 600 | string name = attName.GetLocal(mode); 601 | switch (namePolicy) 602 | { 603 | case XmlViolationPolicy.AlterInfoset: 604 | names[i] = AttributeName.Create(NCName.EscapeName(name)); 605 | goto case XmlViolationPolicy.Allow; // fall through 606 | case XmlViolationPolicy.Allow: 607 | if (attName != AttributeName.XML_LANG) 608 | { 609 | treeBuilder.Warn("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0."); 610 | } 611 | break; 612 | case XmlViolationPolicy.Fatal: 613 | treeBuilder.Fatal("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0."); 614 | break; 615 | } 616 | } 617 | } 618 | } 619 | 620 | public void Merge(HtmlAttributes attributes) 621 | { 622 | int len = attributes.Length; 623 | for (int i = 0; i < len; i++) 624 | { 625 | AttributeName name = attributes.GetAttributeName(i); 626 | if (!Contains(name)) 627 | { 628 | AddAttribute(name, attributes.GetValue(i), XmlViolationPolicy.Allow); 629 | } 630 | } 631 | } 632 | 633 | // ]NOCPP] 634 | } 635 | } 636 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/ILocator.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Patrick Reisert 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | namespace HtmlParserSharp.Core 24 | { 25 | /// 26 | /// Interface for getting the current line and column 27 | /// (Corresponds to the SAX Locator interface). 28 | /// This is implemented by Locator and Tokenizer. 29 | /// 30 | public interface ILocator 31 | { 32 | int LineNumber { get; } 33 | int ColumnNumber { get; } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/ITreeBuilderState.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009-2010 Mozilla Foundation 3 | * Copyright (c) 2012 Patrick Reisert 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | * DEALINGS IN THE SOFTWARE. 22 | */ 23 | 24 | namespace HtmlParserSharp.Core 25 | { 26 | /// 27 | /// Interface for exposing the state of the HTML5 tree builder so that the 28 | /// interface can be implemented by the tree builder itself and by snapshots. 29 | /// 30 | public interface ITreeBuilderState where T : class 31 | { 32 | /// 33 | /// Gets the stack. 34 | /// 35 | /// The stack 36 | StackNode[] Stack { get; } 37 | 38 | /// 39 | /// Gets the list of active formatting elements. 40 | /// 41 | /// The list of active formatting elements. 42 | StackNode[] ListOfActiveFormattingElements { get; } 43 | 44 | /// 45 | /// Gets the form pointer. 46 | /// 47 | /// The form pointer 48 | T FormPointer { get; } 49 | 50 | /// 51 | /// Gets the head pointer. 52 | /// 53 | /// The head pointer. 54 | T HeadPointer { get; } 55 | 56 | /// 57 | /// Gets the deep tree surrogate parent. 58 | /// 59 | /// The deep tree surrogate parent. 60 | T DeepTreeSurrogateParent { get; } 61 | 62 | /// 63 | /// Gets the mode. 64 | /// 65 | /// The mode. 66 | InsertionMode Mode { get; } 67 | 68 | /// 69 | /// Gets the original mode. 70 | /// 71 | /// The original mode. 72 | InsertionMode OriginalMode { get; } 73 | 74 | /// 75 | /// Determines whether the frameset is OK. 76 | /// 77 | /// 78 | /// true if the frameset is OK; otherwise, false. 79 | /// 80 | bool IsFramesetOk { get; } 81 | 82 | /// 83 | /// Determines whether we need to drop LF. 84 | /// 85 | /// 86 | /// true if we need to drop LF; otherwise, false. 87 | /// 88 | bool IsNeedToDropLF { get; } 89 | 90 | /// 91 | /// Determines whether this instance is in quirks mode. 92 | /// 93 | /// 94 | /// true if this instance is in quirks mode; otherwise, false. 95 | /// 96 | bool IsQuirks { get; } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/InsertionMode.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2007-2011 Mozilla Foundation 4 | * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla 5 | * Foundation, and Opera Software ASA. 6 | * Copyright (c) 2012 Patrick Reisert 7 | * 8 | * Permission is hereby granted, free of charge, to any person obtaining a 9 | * copy of this software and associated documentation files (the "Software"), 10 | * to deal in the Software without restriction, including without limitation 11 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 | * and/or sell copies of the Software, and to permit persons to whom the 13 | * Software is furnished to do so, subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be included in 16 | * all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | * DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | namespace HtmlParserSharp.Core 28 | { 29 | public enum InsertionMode 30 | { 31 | INITIAL = 0, 32 | 33 | BEFORE_HTML = 1, 34 | 35 | BEFORE_HEAD = 2, 36 | 37 | IN_HEAD = 3, 38 | 39 | IN_HEAD_NOSCRIPT = 4, 40 | 41 | AFTER_HEAD = 5, 42 | 43 | IN_BODY = 6, 44 | 45 | IN_TABLE = 7, 46 | 47 | IN_CAPTION = 8, 48 | 49 | IN_COLUMN_GROUP = 9, 50 | 51 | IN_TABLE_BODY = 10, 52 | 53 | IN_ROW = 11, 54 | 55 | IN_CELL = 12, 56 | 57 | IN_SELECT = 13, 58 | 59 | IN_SELECT_IN_TABLE = 14, 60 | 61 | AFTER_BODY = 15, 62 | 63 | IN_FRAMESET = 16, 64 | 65 | AFTER_FRAMESET = 17, 66 | 67 | AFTER_AFTER_BODY = 18, 68 | 69 | AFTER_AFTER_FRAMESET = 19, 70 | 71 | TEXT = 20, 72 | 73 | FRAMESET_OK = 21 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/Locator.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2011 Mozilla Foundation 4 | * Copyright (c) 2012 Patrick Reisert 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | * DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | namespace HtmlParserSharp.Core 26 | { 27 | public class Locator : ILocator 28 | { 29 | public int ColumnNumber { get; private set; } 30 | 31 | public int LineNumber { get; private set; } 32 | 33 | public Locator(ILocator locator) 34 | { 35 | ColumnNumber = locator.ColumnNumber; 36 | LineNumber = locator.LineNumber; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/NCName.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2008-2009 Mozilla Foundation 3 | * Copyright (c) 2012 Patrick Reisert 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | * DEALINGS IN THE SOFTWARE. 22 | */ 23 | 24 | using System; 25 | using System.Text; 26 | 27 | namespace HtmlParserSharp.Core 28 | { 29 | public sealed class NCName 30 | { 31 | // [NOCPP[ 32 | 33 | private const int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; 34 | 35 | private static readonly char[] HEX_TABLE = "0123456789ABCDEF".ToCharArray(); 36 | 37 | public static bool IsNCNameStart(char c) 38 | { 39 | return ((c >= '\u0041' && c <= '\u005A') 40 | || (c >= '\u0061' && c <= '\u007A') 41 | || (c >= '\u00C0' && c <= '\u00D6') 42 | || (c >= '\u00D8' && c <= '\u00F6') 43 | || (c >= '\u00F8' && c <= '\u00FF') 44 | || (c >= '\u0100' && c <= '\u0131') 45 | || (c >= '\u0134' && c <= '\u013E') 46 | || (c >= '\u0141' && c <= '\u0148') 47 | || (c >= '\u014A' && c <= '\u017E') 48 | || (c >= '\u0180' && c <= '\u01C3') 49 | || (c >= '\u01CD' && c <= '\u01F0') 50 | || (c >= '\u01F4' && c <= '\u01F5') 51 | || (c >= '\u01FA' && c <= '\u0217') 52 | || (c >= '\u0250' && c <= '\u02A8') 53 | || (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386') 54 | || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C') 55 | || (c >= '\u038E' && c <= '\u03A1') 56 | || (c >= '\u03A3' && c <= '\u03CE') 57 | || (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA') 58 | || (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0') 59 | || (c >= '\u03E2' && c <= '\u03F3') 60 | || (c >= '\u0401' && c <= '\u040C') 61 | || (c >= '\u040E' && c <= '\u044F') 62 | || (c >= '\u0451' && c <= '\u045C') 63 | || (c >= '\u045E' && c <= '\u0481') 64 | || (c >= '\u0490' && c <= '\u04C4') 65 | || (c >= '\u04C7' && c <= '\u04C8') 66 | || (c >= '\u04CB' && c <= '\u04CC') 67 | || (c >= '\u04D0' && c <= '\u04EB') 68 | || (c >= '\u04EE' && c <= '\u04F5') 69 | || (c >= '\u04F8' && c <= '\u04F9') 70 | || (c >= '\u0531' && c <= '\u0556') || (c == '\u0559') 71 | || (c >= '\u0561' && c <= '\u0586') 72 | || (c >= '\u05D0' && c <= '\u05EA') 73 | || (c >= '\u05F0' && c <= '\u05F2') 74 | || (c >= '\u0621' && c <= '\u063A') 75 | || (c >= '\u0641' && c <= '\u064A') 76 | || (c >= '\u0671' && c <= '\u06B7') 77 | || (c >= '\u06BA' && c <= '\u06BE') 78 | || (c >= '\u06C0' && c <= '\u06CE') 79 | || (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5') 80 | || (c >= '\u06E5' && c <= '\u06E6') 81 | || (c >= '\u0905' && c <= '\u0939') || (c == '\u093D') 82 | || (c >= '\u0958' && c <= '\u0961') 83 | || (c >= '\u0985' && c <= '\u098C') 84 | || (c >= '\u098F' && c <= '\u0990') 85 | || (c >= '\u0993' && c <= '\u09A8') 86 | || (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2') 87 | || (c >= '\u09B6' && c <= '\u09B9') 88 | || (c >= '\u09DC' && c <= '\u09DD') 89 | || (c >= '\u09DF' && c <= '\u09E1') 90 | || (c >= '\u09F0' && c <= '\u09F1') 91 | || (c >= '\u0A05' && c <= '\u0A0A') 92 | || (c >= '\u0A0F' && c <= '\u0A10') 93 | || (c >= '\u0A13' && c <= '\u0A28') 94 | || (c >= '\u0A2A' && c <= '\u0A30') 95 | || (c >= '\u0A32' && c <= '\u0A33') 96 | || (c >= '\u0A35' && c <= '\u0A36') 97 | || (c >= '\u0A38' && c <= '\u0A39') 98 | || (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E') 99 | || (c >= '\u0A72' && c <= '\u0A74') 100 | || (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D') 101 | || (c >= '\u0A8F' && c <= '\u0A91') 102 | || (c >= '\u0A93' && c <= '\u0AA8') 103 | || (c >= '\u0AAA' && c <= '\u0AB0') 104 | || (c >= '\u0AB2' && c <= '\u0AB3') 105 | || (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD') 106 | || (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C') 107 | || (c >= '\u0B0F' && c <= '\u0B10') 108 | || (c >= '\u0B13' && c <= '\u0B28') 109 | || (c >= '\u0B2A' && c <= '\u0B30') 110 | || (c >= '\u0B32' && c <= '\u0B33') 111 | || (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D') 112 | || (c >= '\u0B5C' && c <= '\u0B5D') 113 | || (c >= '\u0B5F' && c <= '\u0B61') 114 | || (c >= '\u0B85' && c <= '\u0B8A') 115 | || (c >= '\u0B8E' && c <= '\u0B90') 116 | || (c >= '\u0B92' && c <= '\u0B95') 117 | || (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C') 118 | || (c >= '\u0B9E' && c <= '\u0B9F') 119 | || (c >= '\u0BA3' && c <= '\u0BA4') 120 | || (c >= '\u0BA8' && c <= '\u0BAA') 121 | || (c >= '\u0BAE' && c <= '\u0BB5') 122 | || (c >= '\u0BB7' && c <= '\u0BB9') 123 | || (c >= '\u0C05' && c <= '\u0C0C') 124 | || (c >= '\u0C0E' && c <= '\u0C10') 125 | || (c >= '\u0C12' && c <= '\u0C28') 126 | || (c >= '\u0C2A' && c <= '\u0C33') 127 | || (c >= '\u0C35' && c <= '\u0C39') 128 | || (c >= '\u0C60' && c <= '\u0C61') 129 | || (c >= '\u0C85' && c <= '\u0C8C') 130 | || (c >= '\u0C8E' && c <= '\u0C90') 131 | || (c >= '\u0C92' && c <= '\u0CA8') 132 | || (c >= '\u0CAA' && c <= '\u0CB3') 133 | || (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE') 134 | || (c >= '\u0CE0' && c <= '\u0CE1') 135 | || (c >= '\u0D05' && c <= '\u0D0C') 136 | || (c >= '\u0D0E' && c <= '\u0D10') 137 | || (c >= '\u0D12' && c <= '\u0D28') 138 | || (c >= '\u0D2A' && c <= '\u0D39') 139 | || (c >= '\u0D60' && c <= '\u0D61') 140 | || (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30') 141 | || (c >= '\u0E32' && c <= '\u0E33') 142 | || (c >= '\u0E40' && c <= '\u0E45') 143 | || (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84') 144 | || (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A') 145 | || (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97') 146 | || (c >= '\u0E99' && c <= '\u0E9F') 147 | || (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5') 148 | || (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB') 149 | || (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0') 150 | || (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD') 151 | || (c >= '\u0EC0' && c <= '\u0EC4') 152 | || (c >= '\u0F40' && c <= '\u0F47') 153 | || (c >= '\u0F49' && c <= '\u0F69') 154 | || (c >= '\u10A0' && c <= '\u10C5') 155 | || (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100') 156 | || (c >= '\u1102' && c <= '\u1103') 157 | || (c >= '\u1105' && c <= '\u1107') || (c == '\u1109') 158 | || (c >= '\u110B' && c <= '\u110C') 159 | || (c >= '\u110E' && c <= '\u1112') || (c == '\u113C') 160 | || (c == '\u113E') || (c == '\u1140') || (c == '\u114C') 161 | || (c == '\u114E') || (c == '\u1150') 162 | || (c >= '\u1154' && c <= '\u1155') || (c == '\u1159') 163 | || (c >= '\u115F' && c <= '\u1161') || (c == '\u1163') 164 | || (c == '\u1165') || (c == '\u1167') || (c == '\u1169') 165 | || (c >= '\u116D' && c <= '\u116E') 166 | || (c >= '\u1172' && c <= '\u1173') || (c == '\u1175') 167 | || (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB') 168 | || (c >= '\u11AE' && c <= '\u11AF') 169 | || (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA') 170 | || (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB') 171 | || (c == '\u11F0') || (c == '\u11F9') 172 | || (c >= '\u1E00' && c <= '\u1E9B') 173 | || (c >= '\u1EA0' && c <= '\u1EF9') 174 | || (c >= '\u1F00' && c <= '\u1F15') 175 | || (c >= '\u1F18' && c <= '\u1F1D') 176 | || (c >= '\u1F20' && c <= '\u1F45') 177 | || (c >= '\u1F48' && c <= '\u1F4D') 178 | || (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59') 179 | || (c == '\u1F5B') || (c == '\u1F5D') 180 | || (c >= '\u1F5F' && c <= '\u1F7D') 181 | || (c >= '\u1F80' && c <= '\u1FB4') 182 | || (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE') 183 | || (c >= '\u1FC2' && c <= '\u1FC4') 184 | || (c >= '\u1FC6' && c <= '\u1FCC') 185 | || (c >= '\u1FD0' && c <= '\u1FD3') 186 | || (c >= '\u1FD6' && c <= '\u1FDB') 187 | || (c >= '\u1FE0' && c <= '\u1FEC') 188 | || (c >= '\u1FF2' && c <= '\u1FF4') 189 | || (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126') 190 | || (c >= '\u212A' && c <= '\u212B') || (c == '\u212E') 191 | || (c >= '\u2180' && c <= '\u2182') 192 | || (c >= '\u3041' && c <= '\u3094') 193 | || (c >= '\u30A1' && c <= '\u30FA') 194 | || (c >= '\u3105' && c <= '\u312C') 195 | || (c >= '\uAC00' && c <= '\uD7A3') 196 | || (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007') 197 | || (c >= '\u3021' && c <= '\u3029') || (c == '_')); 198 | } 199 | 200 | public static bool IsNCNameTrail(char c) 201 | { 202 | return ((c >= '\u0030' && c <= '\u0039') 203 | || (c >= '\u0660' && c <= '\u0669') 204 | || (c >= '\u06F0' && c <= '\u06F9') 205 | || (c >= '\u0966' && c <= '\u096F') 206 | || (c >= '\u09E6' && c <= '\u09EF') 207 | || (c >= '\u0A66' && c <= '\u0A6F') 208 | || (c >= '\u0AE6' && c <= '\u0AEF') 209 | || (c >= '\u0B66' && c <= '\u0B6F') 210 | || (c >= '\u0BE7' && c <= '\u0BEF') 211 | || (c >= '\u0C66' && c <= '\u0C6F') 212 | || (c >= '\u0CE6' && c <= '\u0CEF') 213 | || (c >= '\u0D66' && c <= '\u0D6F') 214 | || (c >= '\u0E50' && c <= '\u0E59') 215 | || (c >= '\u0ED0' && c <= '\u0ED9') 216 | || (c >= '\u0F20' && c <= '\u0F29') 217 | || (c >= '\u0041' && c <= '\u005A') 218 | || (c >= '\u0061' && c <= '\u007A') 219 | || (c >= '\u00C0' && c <= '\u00D6') 220 | || (c >= '\u00D8' && c <= '\u00F6') 221 | || (c >= '\u00F8' && c <= '\u00FF') 222 | || (c >= '\u0100' && c <= '\u0131') 223 | || (c >= '\u0134' && c <= '\u013E') 224 | || (c >= '\u0141' && c <= '\u0148') 225 | || (c >= '\u014A' && c <= '\u017E') 226 | || (c >= '\u0180' && c <= '\u01C3') 227 | || (c >= '\u01CD' && c <= '\u01F0') 228 | || (c >= '\u01F4' && c <= '\u01F5') 229 | || (c >= '\u01FA' && c <= '\u0217') 230 | || (c >= '\u0250' && c <= '\u02A8') 231 | || (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386') 232 | || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C') 233 | || (c >= '\u038E' && c <= '\u03A1') 234 | || (c >= '\u03A3' && c <= '\u03CE') 235 | || (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA') 236 | || (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0') 237 | || (c >= '\u03E2' && c <= '\u03F3') 238 | || (c >= '\u0401' && c <= '\u040C') 239 | || (c >= '\u040E' && c <= '\u044F') 240 | || (c >= '\u0451' && c <= '\u045C') 241 | || (c >= '\u045E' && c <= '\u0481') 242 | || (c >= '\u0490' && c <= '\u04C4') 243 | || (c >= '\u04C7' && c <= '\u04C8') 244 | || (c >= '\u04CB' && c <= '\u04CC') 245 | || (c >= '\u04D0' && c <= '\u04EB') 246 | || (c >= '\u04EE' && c <= '\u04F5') 247 | || (c >= '\u04F8' && c <= '\u04F9') 248 | || (c >= '\u0531' && c <= '\u0556') || (c == '\u0559') 249 | || (c >= '\u0561' && c <= '\u0586') 250 | || (c >= '\u05D0' && c <= '\u05EA') 251 | || (c >= '\u05F0' && c <= '\u05F2') 252 | || (c >= '\u0621' && c <= '\u063A') 253 | || (c >= '\u0641' && c <= '\u064A') 254 | || (c >= '\u0671' && c <= '\u06B7') 255 | || (c >= '\u06BA' && c <= '\u06BE') 256 | || (c >= '\u06C0' && c <= '\u06CE') 257 | || (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5') 258 | || (c >= '\u06E5' && c <= '\u06E6') 259 | || (c >= '\u0905' && c <= '\u0939') || (c == '\u093D') 260 | || (c >= '\u0958' && c <= '\u0961') 261 | || (c >= '\u0985' && c <= '\u098C') 262 | || (c >= '\u098F' && c <= '\u0990') 263 | || (c >= '\u0993' && c <= '\u09A8') 264 | || (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2') 265 | || (c >= '\u09B6' && c <= '\u09B9') 266 | || (c >= '\u09DC' && c <= '\u09DD') 267 | || (c >= '\u09DF' && c <= '\u09E1') 268 | || (c >= '\u09F0' && c <= '\u09F1') 269 | || (c >= '\u0A05' && c <= '\u0A0A') 270 | || (c >= '\u0A0F' && c <= '\u0A10') 271 | || (c >= '\u0A13' && c <= '\u0A28') 272 | || (c >= '\u0A2A' && c <= '\u0A30') 273 | || (c >= '\u0A32' && c <= '\u0A33') 274 | || (c >= '\u0A35' && c <= '\u0A36') 275 | || (c >= '\u0A38' && c <= '\u0A39') 276 | || (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E') 277 | || (c >= '\u0A72' && c <= '\u0A74') 278 | || (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D') 279 | || (c >= '\u0A8F' && c <= '\u0A91') 280 | || (c >= '\u0A93' && c <= '\u0AA8') 281 | || (c >= '\u0AAA' && c <= '\u0AB0') 282 | || (c >= '\u0AB2' && c <= '\u0AB3') 283 | || (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD') 284 | || (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C') 285 | || (c >= '\u0B0F' && c <= '\u0B10') 286 | || (c >= '\u0B13' && c <= '\u0B28') 287 | || (c >= '\u0B2A' && c <= '\u0B30') 288 | || (c >= '\u0B32' && c <= '\u0B33') 289 | || (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D') 290 | || (c >= '\u0B5C' && c <= '\u0B5D') 291 | || (c >= '\u0B5F' && c <= '\u0B61') 292 | || (c >= '\u0B85' && c <= '\u0B8A') 293 | || (c >= '\u0B8E' && c <= '\u0B90') 294 | || (c >= '\u0B92' && c <= '\u0B95') 295 | || (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C') 296 | || (c >= '\u0B9E' && c <= '\u0B9F') 297 | || (c >= '\u0BA3' && c <= '\u0BA4') 298 | || (c >= '\u0BA8' && c <= '\u0BAA') 299 | || (c >= '\u0BAE' && c <= '\u0BB5') 300 | || (c >= '\u0BB7' && c <= '\u0BB9') 301 | || (c >= '\u0C05' && c <= '\u0C0C') 302 | || (c >= '\u0C0E' && c <= '\u0C10') 303 | || (c >= '\u0C12' && c <= '\u0C28') 304 | || (c >= '\u0C2A' && c <= '\u0C33') 305 | || (c >= '\u0C35' && c <= '\u0C39') 306 | || (c >= '\u0C60' && c <= '\u0C61') 307 | || (c >= '\u0C85' && c <= '\u0C8C') 308 | || (c >= '\u0C8E' && c <= '\u0C90') 309 | || (c >= '\u0C92' && c <= '\u0CA8') 310 | || (c >= '\u0CAA' && c <= '\u0CB3') 311 | || (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE') 312 | || (c >= '\u0CE0' && c <= '\u0CE1') 313 | || (c >= '\u0D05' && c <= '\u0D0C') 314 | || (c >= '\u0D0E' && c <= '\u0D10') 315 | || (c >= '\u0D12' && c <= '\u0D28') 316 | || (c >= '\u0D2A' && c <= '\u0D39') 317 | || (c >= '\u0D60' && c <= '\u0D61') 318 | || (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30') 319 | || (c >= '\u0E32' && c <= '\u0E33') 320 | || (c >= '\u0E40' && c <= '\u0E45') 321 | || (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84') 322 | || (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A') 323 | || (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97') 324 | || (c >= '\u0E99' && c <= '\u0E9F') 325 | || (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5') 326 | || (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB') 327 | || (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0') 328 | || (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD') 329 | || (c >= '\u0EC0' && c <= '\u0EC4') 330 | || (c >= '\u0F40' && c <= '\u0F47') 331 | || (c >= '\u0F49' && c <= '\u0F69') 332 | || (c >= '\u10A0' && c <= '\u10C5') 333 | || (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100') 334 | || (c >= '\u1102' && c <= '\u1103') 335 | || (c >= '\u1105' && c <= '\u1107') || (c == '\u1109') 336 | || (c >= '\u110B' && c <= '\u110C') 337 | || (c >= '\u110E' && c <= '\u1112') || (c == '\u113C') 338 | || (c == '\u113E') || (c == '\u1140') || (c == '\u114C') 339 | || (c == '\u114E') || (c == '\u1150') 340 | || (c >= '\u1154' && c <= '\u1155') || (c == '\u1159') 341 | || (c >= '\u115F' && c <= '\u1161') || (c == '\u1163') 342 | || (c == '\u1165') || (c == '\u1167') || (c == '\u1169') 343 | || (c >= '\u116D' && c <= '\u116E') 344 | || (c >= '\u1172' && c <= '\u1173') || (c == '\u1175') 345 | || (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB') 346 | || (c >= '\u11AE' && c <= '\u11AF') 347 | || (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA') 348 | || (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB') 349 | || (c == '\u11F0') || (c == '\u11F9') 350 | || (c >= '\u1E00' && c <= '\u1E9B') 351 | || (c >= '\u1EA0' && c <= '\u1EF9') 352 | || (c >= '\u1F00' && c <= '\u1F15') 353 | || (c >= '\u1F18' && c <= '\u1F1D') 354 | || (c >= '\u1F20' && c <= '\u1F45') 355 | || (c >= '\u1F48' && c <= '\u1F4D') 356 | || (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59') 357 | || (c == '\u1F5B') || (c == '\u1F5D') 358 | || (c >= '\u1F5F' && c <= '\u1F7D') 359 | || (c >= '\u1F80' && c <= '\u1FB4') 360 | || (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE') 361 | || (c >= '\u1FC2' && c <= '\u1FC4') 362 | || (c >= '\u1FC6' && c <= '\u1FCC') 363 | || (c >= '\u1FD0' && c <= '\u1FD3') 364 | || (c >= '\u1FD6' && c <= '\u1FDB') 365 | || (c >= '\u1FE0' && c <= '\u1FEC') 366 | || (c >= '\u1FF2' && c <= '\u1FF4') 367 | || (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126') 368 | || (c >= '\u212A' && c <= '\u212B') || (c == '\u212E') 369 | || (c >= '\u2180' && c <= '\u2182') 370 | || (c >= '\u3041' && c <= '\u3094') 371 | || (c >= '\u30A1' && c <= '\u30FA') 372 | || (c >= '\u3105' && c <= '\u312C') 373 | || (c >= '\uAC00' && c <= '\uD7A3') 374 | || (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007') 375 | || (c >= '\u3021' && c <= '\u3029') || (c == '_') || (c == '.') 376 | || (c == '-') || (c >= '\u0300' && c <= '\u0345') 377 | || (c >= '\u0360' && c <= '\u0361') 378 | || (c >= '\u0483' && c <= '\u0486') 379 | || (c >= '\u0591' && c <= '\u05A1') 380 | || (c >= '\u05A3' && c <= '\u05B9') 381 | || (c >= '\u05BB' && c <= '\u05BD') || (c == '\u05BF') 382 | || (c >= '\u05C1' && c <= '\u05C2') || (c == '\u05C4') 383 | || (c >= '\u064B' && c <= '\u0652') || (c == '\u0670') 384 | || (c >= '\u06D6' && c <= '\u06DC') 385 | || (c >= '\u06DD' && c <= '\u06DF') 386 | || (c >= '\u06E0' && c <= '\u06E4') 387 | || (c >= '\u06E7' && c <= '\u06E8') 388 | || (c >= '\u06EA' && c <= '\u06ED') 389 | || (c >= '\u0901' && c <= '\u0903') || (c == '\u093C') 390 | || (c >= '\u093E' && c <= '\u094C') || (c == '\u094D') 391 | || (c >= '\u0951' && c <= '\u0954') 392 | || (c >= '\u0962' && c <= '\u0963') 393 | || (c >= '\u0981' && c <= '\u0983') || (c == '\u09BC') 394 | || (c == '\u09BE') || (c == '\u09BF') 395 | || (c >= '\u09C0' && c <= '\u09C4') 396 | || (c >= '\u09C7' && c <= '\u09C8') 397 | || (c >= '\u09CB' && c <= '\u09CD') || (c == '\u09D7') 398 | || (c >= '\u09E2' && c <= '\u09E3') || (c == '\u0A02') 399 | || (c == '\u0A3C') || (c == '\u0A3E') || (c == '\u0A3F') 400 | || (c >= '\u0A40' && c <= '\u0A42') 401 | || (c >= '\u0A47' && c <= '\u0A48') 402 | || (c >= '\u0A4B' && c <= '\u0A4D') 403 | || (c >= '\u0A70' && c <= '\u0A71') 404 | || (c >= '\u0A81' && c <= '\u0A83') || (c == '\u0ABC') 405 | || (c >= '\u0ABE' && c <= '\u0AC5') 406 | || (c >= '\u0AC7' && c <= '\u0AC9') 407 | || (c >= '\u0ACB' && c <= '\u0ACD') 408 | || (c >= '\u0B01' && c <= '\u0B03') || (c == '\u0B3C') 409 | || (c >= '\u0B3E' && c <= '\u0B43') 410 | || (c >= '\u0B47' && c <= '\u0B48') 411 | || (c >= '\u0B4B' && c <= '\u0B4D') 412 | || (c >= '\u0B56' && c <= '\u0B57') 413 | || (c >= '\u0B82' && c <= '\u0B83') 414 | || (c >= '\u0BBE' && c <= '\u0BC2') 415 | || (c >= '\u0BC6' && c <= '\u0BC8') 416 | || (c >= '\u0BCA' && c <= '\u0BCD') || (c == '\u0BD7') 417 | || (c >= '\u0C01' && c <= '\u0C03') 418 | || (c >= '\u0C3E' && c <= '\u0C44') 419 | || (c >= '\u0C46' && c <= '\u0C48') 420 | || (c >= '\u0C4A' && c <= '\u0C4D') 421 | || (c >= '\u0C55' && c <= '\u0C56') 422 | || (c >= '\u0C82' && c <= '\u0C83') 423 | || (c >= '\u0CBE' && c <= '\u0CC4') 424 | || (c >= '\u0CC6' && c <= '\u0CC8') 425 | || (c >= '\u0CCA' && c <= '\u0CCD') 426 | || (c >= '\u0CD5' && c <= '\u0CD6') 427 | || (c >= '\u0D02' && c <= '\u0D03') 428 | || (c >= '\u0D3E' && c <= '\u0D43') 429 | || (c >= '\u0D46' && c <= '\u0D48') 430 | || (c >= '\u0D4A' && c <= '\u0D4D') || (c == '\u0D57') 431 | || (c == '\u0E31') || (c >= '\u0E34' && c <= '\u0E3A') 432 | || (c >= '\u0E47' && c <= '\u0E4E') || (c == '\u0EB1') 433 | || (c >= '\u0EB4' && c <= '\u0EB9') 434 | || (c >= '\u0EBB' && c <= '\u0EBC') 435 | || (c >= '\u0EC8' && c <= '\u0ECD') 436 | || (c >= '\u0F18' && c <= '\u0F19') || (c == '\u0F35') 437 | || (c == '\u0F37') || (c == '\u0F39') || (c == '\u0F3E') 438 | || (c == '\u0F3F') || (c >= '\u0F71' && c <= '\u0F84') 439 | || (c >= '\u0F86' && c <= '\u0F8B') 440 | || (c >= '\u0F90' && c <= '\u0F95') || (c == '\u0F97') 441 | || (c >= '\u0F99' && c <= '\u0FAD') 442 | || (c >= '\u0FB1' && c <= '\u0FB7') || (c == '\u0FB9') 443 | || (c >= '\u20D0' && c <= '\u20DC') || (c == '\u20E1') 444 | || (c >= '\u302A' && c <= '\u302F') || (c == '\u3099') 445 | || (c == '\u309A') || (c == '\u00B7') || (c == '\u02D0') 446 | || (c == '\u02D1') || (c == '\u0387') || (c == '\u0640') 447 | || (c == '\u0E46') || (c == '\u0EC6') || (c == '\u3005') 448 | || (c >= '\u3031' && c <= '\u3035') 449 | || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE')); 450 | } 451 | 452 | public static bool IsNCName(string str) 453 | { 454 | if (str == null) 455 | { 456 | return false; 457 | } 458 | else 459 | { 460 | int len = str.Length; 461 | switch (len) 462 | { 463 | case 0: 464 | return false; 465 | case 1: 466 | return NCName.IsNCNameStart(str[0]); 467 | default: 468 | if (!NCName.IsNCNameStart(str[0])) 469 | { 470 | return false; 471 | } 472 | for (int i = 1; i < len; i++) 473 | { 474 | if (!NCName.IsNCNameTrail(str[i])) 475 | { 476 | return false; 477 | } 478 | } 479 | 480 | return true; 481 | } 482 | } 483 | } 484 | 485 | private static void AppendUHexTo(StringBuilder sb, int c) 486 | { 487 | sb.Append('U'); 488 | for (int i = 0; i < 6; i++) 489 | { 490 | sb.Append(HEX_TABLE[(c & 0xF00000) >> 20]); 491 | c <<= 4; 492 | } 493 | } 494 | 495 | public static string EscapeName(string str) 496 | { 497 | StringBuilder sb = new StringBuilder(); 498 | for (int i = 0; i < str.Length; i++) 499 | { 500 | char c = str[i]; 501 | if ((c & 0xFC00) == 0xD800) 502 | { 503 | char next = str[++i]; 504 | AppendUHexTo(sb, (c << 10) + next + SURROGATE_OFFSET); 505 | } 506 | else if (i == 0 && !IsNCNameStart(c)) 507 | { 508 | AppendUHexTo(sb, c); 509 | } 510 | else if (i != 0 && !IsNCNameTrail(c)) 511 | { 512 | AppendUHexTo(sb, c); 513 | } 514 | else 515 | { 516 | sb.Append(c); 517 | } 518 | } 519 | return String.Intern(sb.ToString()); 520 | } 521 | // ]NOCPP] 522 | } 523 | 524 | } 525 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/NamedCharacterAccel.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera 3 | * Software ASA. 4 | * 5 | * You are granted a license to use, reproduce and create derivative works of 6 | * this document. 7 | */ 8 | 9 | namespace HtmlParserSharp.Core 10 | { 11 | public sealed class NamedCharactersAccel 12 | { 13 | internal static readonly int[][] HILO_ACCEL = new int[][] { 14 | null, 15 | null, 16 | null, 17 | null, 18 | null, 19 | null, 20 | null, 21 | null, 22 | null, 23 | null, 24 | null, 25 | null, 26 | null, 27 | null, 28 | null, 29 | null, 30 | null, 31 | null, 32 | null, 33 | null, 34 | null, 35 | null, 36 | null, 37 | null, 38 | null, 39 | null, 40 | null, 41 | null, 42 | null, 43 | null, 44 | null, 45 | null, 46 | null, 47 | null, 48 | null, 49 | null, 50 | null, 51 | null, 52 | null, 53 | null, 54 | null, 55 | null, 56 | null, 57 | null, 58 | null, 59 | null, 60 | null, 61 | null, 62 | null, 63 | null, 64 | null, 65 | null, 66 | null, 67 | null, 68 | null, 69 | null, 70 | null, 71 | null, 72 | null, 73 | null, 74 | null, 75 | null, 76 | null, 77 | null, 78 | null, 79 | new int[] { 0, 0, 0, 0, 0, 0, 0, 12386493, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80 | 0, 0, 0, 0, 0, 40174181, 0, 0, 0, 0, 60162966, 0, 0, 0, 81 | 75367550, 0, 0, 0, 82183396, 0, 0, 0, 0, 0, 115148507, 0, 82 | 0, 135989275, 139397199, 0, 0, 0, 0, }, 83 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28770743, 0, 84 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85 | 82248935, 0, 0, 0, 0, 0, 115214046, 0, 0, 0, 139528272, 0, 86 | 0, 0, 0, }, 87 | null, 88 | new int[] { 0, 0, 0, 4980811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89 | 0, 38470219, 0, 0, 0, 0, 0, 0, 0, 0, 64553944, 0, 0, 0, 0, 90 | 0, 0, 0, 92145022, 0, 0, 0, 0, 0, 0, 0, 0, 139593810, 0, 0, 91 | 0, 0, }, 92 | new int[] { 65536, 0, 0, 0, 0, 0, 0, 0, 13172937, 0, 0, 0, 0, 0, 25297282, 0, 93 | 0, 28901816, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94 | 71500866, 0, 0, 0, 0, 82380008, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95 | 0, 0, 0, 0, 0, }, 96 | null, 97 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99 | 94897574, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 100 | new int[] { 0, 0, 2555943, 0, 0, 0, 0, 0, 0, 0, 15532269, 0, 0, 0, 0, 0, 0, 101 | 0, 31785444, 34406924, 0, 0, 0, 0, 0, 40895088, 0, 0, 0, 102 | 60228503, 0, 0, 0, 0, 0, 0, 0, 82445546, 0, 0, 0, 0, 0, 103 | 115279583, 0, 0, 136054812, 0, 0, 0, 0, 0, }, 104 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 105 | 0, 0, 40239718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 107 | new int[] { 0, 0, 0, 5046349, 0, 0, 10944679, 0, 13238474, 0, 15597806, 108 | 16056565, 0, 20578618, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 110 | 0, 0, 0, 0, 0, 0, }, 111 | null, 112 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114 | 95225257, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 115 | new int[] { 196610, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 118 | new int[] { 0, 0, 0, 0, 8454273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119 | 0, 0, 0, 0, 0, 0, 0, 46072511, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 121 | new int[] { 0, 0, 2687016, 0, 0, 0, 0, 0, 13304011, 0, 0, 0, 0, 0, 0, 0, 0, 122 | 0, 31850982, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 124 | null, 125 | null, 126 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127 | 34472462, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 | 0, 0, 0, 95290798, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 129 | new int[] { 0, 0, 0, 5111886, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130 | 34603535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 131 | 0, 0, 0, 0, 105776718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 132 | new int[] { 0, 0, 0, 0, 8585346, 0, 11075752, 0, 0, 0, 0, 16187638, 0, 0, 0, 133 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 134 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 135 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28508594, 0, 0, 136 | 0, 0, 0, 0, 0, 40305255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 137 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 138 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 140 | 95421871, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 141 | null, 142 | null, 143 | null, 144 | new int[] { 0, 0, 0, 5177423, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 145 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 146 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 147 | null, 148 | null, 149 | null, 150 | null, 151 | null, 152 | null, 153 | new int[] { 327684, 1900571, 2949162, 5374032, 8716420, 0, 11206826, 154 | 12517566, 13435084, 0, 15663343, 16515320, 19988785, 155 | 20644155, 25428355, 27197855, 0, 29163962, 31916519, 156 | 34734609, 36045347, 0, 0, 0, 40436328, 40960625, 41615994, 157 | 46596800, 54264627, 60556184, 64750554, 68879387, 71763012, 158 | 75826303, 77268122, 0, 81462490, 83952875, 92865919, 159 | 96142769, 105973327, 110167691, 0, 116917984, 121833283, 160 | 132253665, 136251421, 140707923, 0, 0, 144574620, 161 | 145361066, }, 162 | new int[] { 393222, 0, 0, 0, 0, 0, 11272364, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 163 | 0, 0, 36176423, 38535756, 0, 0, 0, 0, 41681532, 46727880, 164 | 0, 60687261, 0, 0, 71828552, 75891846, 0, 0, 0, 84411650, 165 | 0, 96404924, 0, 0, 0, 117376761, 121898820, 132319203, 166 | 136382496, 0, 0, 0, 0, 0, }, 167 | new int[] { 589831, 1966110, 3276846, 5505107, 8978566, 10420383, 11468973, 168 | 12583104, 13631694, 15139046, 15794416, 16711933, 20054322, 169 | 20840764, 25624965, 27263392, 0, 29360574, 32244200, 170 | 34931219, 36373033, 38601293, 39584348, 0, 40567402, 171 | 41091698, 42205821, 46858954, 54723389, 60818335, 65143773, 172 | 68944924, 71959625, 75957383, 77530268, 80938194, 81593564, 173 | 84739337, 92997002, 96863680, 106235474, 110233234, 0, 174 | 117704448, 122816325, 132515812, 136579106, 140773476, 175 | 142149753, 143001732, 144705695, 145492139, }, 176 | new int[] { 0, 0, 3342387, 0, 9044106, 0, 11534512, 0, 13697233, 0, 0, 0, 0, 177 | 0, 25690504, 0, 0, 0, 0, 0, 36438572, 38732366, 0, 0, 0, 178 | 41157236, 0, 46924492, 54788932, 61080481, 65209315, 0, 179 | 72025163, 0, 0, 0, 0, 85132558, 93062540, 96929223, 180 | 106563158, 0, 0, 118032133, 123012947, 132581351, 181 | 136775717, 140839013, 0, 143067271, 0, 145557677, }, 182 | new int[] { 0, 2162719, 3473460, 5636181, 0, 0, 0, 0, 0, 0, 0, 18809088, 183 | 20185395, 21299519, 0, 0, 0, 29622721, 0, 0, 0, 39256656, 184 | 39649885, 0, 0, 41288309, 42336901, 47448781, 55182149, 185 | 61342629, 65274852, 69010461, 72811596, 76219528, 77726880, 186 | 0, 0, 86967572, 93128077, 97650120, 106628699, 110560915, 187 | 0, 118490890, 123733846, 132646888, 0, 141232230, 188 | 142411898, 0, 144836769, 145688750, }, 189 | new int[] { 655370, 2228258, 3538998, 5701719, 9109643, 10485920, 11600049, 190 | 12648641, 13762770, 15204584, 15859954, 18874656, 20250933, 191 | 21365062, 25756041, 27328929, 28574132, 29688261, 32309741, 192 | 34996758, 36504109, 39322200, 39715422, 39912033, 40632940, 193 | 41353847, 42467975, 47514325, 55247691, 61473705, 65405925, 194 | 69272606, 72877144, 76285068, 77857955, 81003732, 81659102, 195 | 87164208, 93193614, 97715667, 106759772, 110626456, 196 | 114296528, 118687505, 123864929, 132712425, 136906792, 197 | 141297772, 142477438, 143132808, 144902307, 145754288, }, 198 | new int[] { 786443, 0, 0, 0, 9240716, 0, 11665586, 0, 13893843, 0, 0, 0, 0, 199 | 0, 25887114, 0, 0, 0, 0, 0, 36635182, 0, 0, 0, 0, 0, 200 | 42599049, 0, 0, 0, 65733607, 0, 73008217, 0, 77989029, 0, 201 | 81724639, 87295283, 0, 98305492, 107021918, 0, 0, 0, 0, 0, 202 | 137037866, 0, 0, 0, 0, 0, }, 203 | new int[] { 0, 0, 3604535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27394466, 0, 204 | 29753798, 32571886, 35258903, 0, 0, 0, 0, 0, 0, 0, 0, 205 | 55509836, 61604779, 0, 0, 0, 0, 0, 0, 81790176, 87557429, 206 | 93259151, 98502109, 107152994, 110888601, 0, 119015188, 207 | 124323683, 133498858, 137234476, 0, 0, 143263881, 0, 208 | 145819825, }, 209 | new int[] { 0, 0, 3866680, 6160472, 0, 10616993, 0, 12714178, 0, 0, 0, 0, 210 | 20316470, 0, 0, 27460003, 0, 31261127, 32637426, 35521051, 211 | 0, 0, 0, 39977570, 0, 0, 0, 48366294, 56492880, 62391213, 212 | 0, 69338146, 73073755, 0, 78316711, 0, 0, 0, 93980048, 213 | 98764256, 107218532, 111085213, 114362065, 119736089, 214 | 125241194, 133957622, 0, 0, 0, 143329419, 144967844, 215 | 145885362, }, 216 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 217 | 0, 0, 0, 0, 0, 0, 0, 62456761, 0, 69403683, 73139292, 0, 218 | 78382252, 0, 81855713, 87622969, 0, 98829796, 0, 0, 0, 0, 219 | 0, 0, 0, 0, 0, 0, 0, 0, }, 220 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 221 | 0, 0, 0, 0, 0, 48431843, 0, 0, 0, 0, 0, 76416141, 0, 0, 0, 222 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 223 | new int[] { 851981, 0, 4063292, 0, 9306254, 0, 0, 0, 0, 0, 0, 19005729, 0, 0, 224 | 0, 27525540, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42795659, 225 | 49152740, 56623967, 62587834, 66061292, 69600292, 73401437, 226 | 0, 0, 0, 0, 87950650, 94111131, 99878373, 107546213, 227 | 112002720, 0, 119932708, 125306744, 0, 137496623, 228 | 141363309, 0, 143460492, 0, 0, }, 229 | new int[] { 917518, 0, 0, 0, 9502863, 0, 0, 0, 14155989, 0, 0, 19071267, 0, 230 | 0, 26083724, 0, 0, 0, 32702963, 0, 36700720, 0, 0, 0, 0, 0, 231 | 43057806, 0, 0, 0, 66520049, 0, 0, 0, 78841005, 81069269, 232 | 0, 88147263, 0, 99943925, 107873898, 112068270, 0, 233 | 120063783, 125831033, 0, 137693235, 0, 0, 143526030, 0, 0, }, 234 | new int[] { 983055, 0, 0, 0, 0, 0, 0, 0, 14483673, 0, 0, 0, 0, 0, 0, 0, 0, 0, 235 | 0, 0, 37093937, 0, 0, 0, 0, 0, 44565138, 49349359, 0, 0, 236 | 66651128, 69665831, 73860193, 0, 79561908, 0, 0, 88606018, 237 | 94176669, 0, 0, 0, 0, 120129321, 0, 0, 0, 141494382, 0, 238 | 143591567, 0, 0, }, 239 | new int[] { 1114128, 2293795, 4587583, 8257631, 9633938, 10813603, 11731123, 240 | 12845251, 14680286, 15270121, 15925491, 19661092, 20382007, 241 | 24969543, 26149263, 27656613, 28639669, 31392222, 32768500, 242 | 35586591, 37225015, 39387737, 39780959, 40043107, 40698477, 243 | 41419384, 44696233, 52495090, 57738081, 63439804, 66782202, 244 | 69927976, 73925736, 76809359, 79824063, 81134806, 81921250, 245 | 89785673, 94307742, 100795894, 107939439, 112330415, 246 | 114427602, 120588074, 126158721, 134416381, 137824310, 247 | 141559920, 142542975, 143853712, 145033381, 145950899, }, 248 | new int[] { 1179666, 0, 0, 0, 9699476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26280336, 249 | 0, 0, 0, 0, 0, 38076985, 0, 0, 0, 0, 0, 45220523, 52560674, 250 | 0, 0, 67175420, 69993516, 0, 0, 79889603, 0, 0, 89916763, 251 | 94373280, 101451267, 108136048, 0, 114493139, 120784689, 252 | 126355334, 134481924, 138414136, 141625457, 142608512, 0, 253 | 0, 0, }, 254 | new int[] { 0, 0, 0, 0, 9896085, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255 | 33292789, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67830786, 0, 0, 256 | 0, 80020676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127403913, 0, 0, 0, 257 | 0, 0, 0, 0, }, 258 | new int[] { 1310739, 2359332, 4653127, 0, 0, 0, 12189876, 0, 0, 0, 0, 0, 0, 259 | 0, 26345874, 28246439, 0, 31457760, 0, 35652128, 38142534, 260 | 0, 0, 0, 0, 0, 45351603, 52757283, 57869170, 63636425, 261 | 67961868, 71304237, 73991273, 0, 0, 0, 0, 90309981, 0, 262 | 101910029, 108988019, 114034355, 0, 120850228, 127469465, 263 | 135464965, 138741825, 141690994, 142739585, 143984788, 0, 264 | 0, }, 265 | new int[] { 1441813, 2424869, 4718664, 8388735, 10027160, 10879142, 12255419, 266 | 12976325, 14745825, 15401194, 15991028, 19857709, 20447544, 267 | 25035134, 26542483, 28377520, 28705206, 31588833, 33358333, 268 | 35783201, 38208071, 39453274, 39846496, 40108644, 40764014, 269 | 41484921, 45613749, 53216038, 58196852, 63898572, 68158478, 270 | 71369793, 74253418, 77005973, 80479430, 81265879, 81986787, 271 | 90965347, 94504353, 103679508, 109250176, 114165453, 272 | 114558676, 121243445, 127731610, 135727124, 138807366, 273 | 142018675, 142805123, 144115862, 145098918, 146016436, }, 274 | new int[] { 1572887, 0, 0, 0, 10092698, 0, 12320956, 0, 14811362, 0, 0, 275 | 19923248, 0, 25166207, 26739094, 0, 0, 0, 33423870, 0, 276 | 38273608, 0, 0, 0, 0, 0, 45744825, 0, 58262393, 64095184, 277 | 68355089, 0, 75170926, 0, 80610509, 0, 0, 91817325, 0, 278 | 104203823, 109512324, 0, 0, 121636667, 128059294, 0, 279 | 139069511, 0, 0, 0, 0, 0, }, 280 | new int[] { 1703961, 2490406, 4849737, 0, 10223771, 0, 0, 13107399, 15007971, 281 | 15466732, 0, 0, 20513081, 25231745, 26870169, 0, 0, 282 | 31654371, 34275839, 0, 38404681, 0, 0, 0, 40829551, 0, 283 | 45875899, 53609261, 59900794, 64226259, 68551700, 0, 0, 0, 284 | 80807119, 81331417, 0, 91948410, 94700963, 104465975, 285 | 109643400, 114230991, 114951893, 121702209, 131663779, 0, 286 | 139266123, 0, 0, 144246936, 145295527, 0, }, 287 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27132315, 0, 0, 0, 0, 288 | 0, 0, 39518811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75302012, 0, 289 | 0, 0, 0, 92079484, 0, 105383483, 109708938, 0, 0, 0, 0, 0, 290 | 0, 0, 0, 144312474, 0, 0, }, 291 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 292 | 0, 0, 0, 0, 46006973, 0, 60031891, 64291797, 0, 0, 0, 0, 0, 293 | 0, 0, 0, 0, 105711177, 0, 0, 0, 0, 131991514, 135923736, 294 | 139331662, 0, 0, 144378011, 0, 146147509, }, 295 | new int[] { 0, 0, 0, 0, 10354845, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 296 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68813847, 0, 0, 0, 0, 0, 297 | 0, 0, 0, 0, 0, 0, 0, 121767746, 0, 0, 0, 0, 0, 0, 0, 0, }, 298 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 299 | 0, 0, 0, 0, 0, 0, 60097429, 0, 0, 0, 0, 77137048, 0, 0, 0, 300 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, 301 | new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 302 | 0, 0, 0, 0, 0, 0, 0, 64422870, 0, 0, 0, 0, 0, 0, 0, 0, 0, 303 | 0, 0, 0, 0, 0, 132122591, 0, 0, 142084216, 0, 0, 0, 0, }, }; 304 | 305 | } 306 | 307 | } 308 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/Portability.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2008-2009 Mozilla Foundation 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | using System; 24 | using HtmlParserSharp.Common; 25 | 26 | namespace HtmlParserSharp.Core 27 | { 28 | /// 29 | /// Class for C++ portability. 30 | /// TODO: Remove this 31 | /// 32 | public sealed class Portability 33 | { 34 | // Allocating methods 35 | 36 | /// 37 | /// Allocates a new local name object. In C++, the refcount must be set up in such a way that 38 | /// calling releaseLocal on the return value balances the refcount set by this method. 39 | /// 40 | [Local] 41 | public static String NewLocalNameFromBuffer(char[] buf, int offset, int length) 42 | { 43 | return string.Intern(new String(buf, offset, length)); 44 | } 45 | 46 | // Comparison methods 47 | 48 | public static bool LocalEqualsBuffer([Local] string local, char[] buf, int offset, int length) 49 | { 50 | if (local.Length != length) 51 | { 52 | return false; 53 | } 54 | for (int i = 0; i < length; i++) 55 | { 56 | if (local[i] != buf[offset + i]) 57 | { 58 | return false; 59 | } 60 | } 61 | return true; 62 | } 63 | 64 | public static bool LowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString(string lowerCaseLiteral, string str) 65 | { 66 | if (str == null) 67 | { 68 | return false; 69 | } 70 | if (lowerCaseLiteral.Length > str.Length) 71 | { 72 | return false; 73 | } 74 | for (int i = 0; i < lowerCaseLiteral.Length; i++) 75 | { 76 | char c0 = lowerCaseLiteral[i]; 77 | char c1 = str[i]; 78 | if (c1 >= 'A' && c1 <= 'Z') 79 | { 80 | c1 += (char)0x20; 81 | } 82 | if (c0 != c1) 83 | { 84 | return false; 85 | } 86 | } 87 | return true; 88 | } 89 | 90 | public static bool LowerCaseLiteralEqualsIgnoreAsciiCaseString(string lowerCaseLiteral, string str) 91 | { 92 | if (str == null) 93 | { 94 | return false; 95 | } 96 | if (lowerCaseLiteral.Length != str.Length) 97 | { 98 | return false; 99 | } 100 | for (int i = 0; i < lowerCaseLiteral.Length; i++) 101 | { 102 | char c0 = lowerCaseLiteral[i]; 103 | char c1 = str[i]; 104 | if (c1 >= 'A' && c1 <= 'Z') 105 | { 106 | c1 += (char)0x20; 107 | } 108 | if (c0 != c1) 109 | { 110 | return false; 111 | } 112 | } 113 | return true; 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/StackNode.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 Henri Sivonen 3 | * Copyright (c) 2007-2011 Mozilla Foundation 4 | * Copyright (c) 2012 Patrick Reisert 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | * DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | using System; 26 | using System.Diagnostics; 27 | using HtmlParserSharp.Common; 28 | 29 | namespace HtmlParserSharp.Core 30 | { 31 | public sealed class StackNode 32 | { 33 | readonly int flags; 34 | 35 | [Local] 36 | internal readonly string name; 37 | 38 | [Local] 39 | internal readonly string popName; 40 | 41 | [NsUri] 42 | internal readonly string ns; 43 | 44 | internal readonly T node; 45 | 46 | // Only used on the list of formatting elements 47 | internal HtmlAttributes attributes; 48 | 49 | private int refcount = 1; 50 | 51 | // [NOCPP[ 52 | 53 | private readonly TaintableLocator locator; 54 | 55 | public TaintableLocator Locator 56 | { 57 | get 58 | { 59 | return locator; 60 | } 61 | } 62 | 63 | // ]NOCPP] 64 | 65 | public int Flags 66 | { 67 | get 68 | { 69 | return flags; 70 | } 71 | } 72 | 73 | public DispatchGroup Group 74 | { 75 | get 76 | { 77 | return (DispatchGroup)(flags & ElementName.GROUP_MASK); 78 | } 79 | } 80 | 81 | public bool IsScoping 82 | { 83 | get 84 | { 85 | return (flags & ElementName.SCOPING) != 0; 86 | } 87 | } 88 | 89 | public bool IsSpecial 90 | { 91 | get 92 | { 93 | return (flags & ElementName.SPECIAL) != 0; 94 | } 95 | } 96 | 97 | public bool IsFosterParenting 98 | { 99 | get 100 | { 101 | return (flags & ElementName.FOSTER_PARENTING) != 0; 102 | } 103 | } 104 | 105 | public bool IsHtmlIntegrationPoint 106 | { 107 | get 108 | { 109 | return (flags & ElementName.HTML_INTEGRATION_POINT) != 0; 110 | } 111 | } 112 | 113 | // [NOCPP[ 114 | 115 | public bool IsOptionalEndTag 116 | { 117 | get 118 | { 119 | return (flags & ElementName.OPTIONAL_END_TAG) != 0; 120 | } 121 | } 122 | 123 | // ]NOCPP] 124 | 125 | /// 126 | /// Constructor for copying. This doesn't take another StackNode 127 | /// because in C++ the caller is reponsible for reobtaining the local names 128 | /// from another interner. 129 | /// 130 | internal StackNode(int flags, [NsUri] String ns, [Local] String name, T node, 131 | [Local] String popName, HtmlAttributes attributes 132 | // [NOCPP[ 133 | , TaintableLocator locator 134 | // ]NOCPP] 135 | ) 136 | { 137 | this.flags = flags; 138 | this.name = name; 139 | this.popName = popName; 140 | this.ns = ns; 141 | this.node = node; 142 | this.attributes = attributes; 143 | this.refcount = 1; 144 | // [NOCPP[ 145 | this.locator = locator; 146 | // ]NOCPP] 147 | } 148 | 149 | /// 150 | /// Short hand for well-known HTML elements. 151 | /// 152 | internal StackNode(ElementName elementName, T node 153 | // [NOCPP[ 154 | , TaintableLocator locator 155 | // ]NOCPP] 156 | ) 157 | { 158 | this.flags = elementName.Flags; 159 | this.name = elementName.name; 160 | this.popName = elementName.name; 161 | this.ns = "http://www.w3.org/1999/xhtml"; 162 | this.node = node; 163 | this.attributes = null; 164 | this.refcount = 1; 165 | Debug.Assert(!elementName.IsCustom, "Don't use this constructor for custom elements."); 166 | // [NOCPP[ 167 | this.locator = locator; 168 | // ]NOCPP] 169 | } 170 | 171 | /// 172 | /// Constructor for HTML formatting elements. 173 | /// 174 | internal StackNode(ElementName elementName, T node, HtmlAttributes attributes 175 | // [NOCPP[ 176 | , TaintableLocator locator 177 | // ]NOCPP] 178 | ) 179 | { 180 | this.flags = elementName.Flags; 181 | this.name = elementName.name; 182 | this.popName = elementName.name; 183 | this.ns = "http://www.w3.org/1999/xhtml"; 184 | this.node = node; 185 | this.attributes = attributes; 186 | this.refcount = 1; 187 | Debug.Assert(!elementName.IsCustom, "Don't use this constructor for custom elements."); 188 | // [NOCPP[ 189 | this.locator = locator; 190 | // ]NOCPP] 191 | } 192 | 193 | /// 194 | /// The common-case HTML constructor. 195 | /// 196 | internal StackNode(ElementName elementName, T node, [Local] string popName 197 | // [NOCPP[ 198 | , TaintableLocator locator 199 | // ]NOCPP] 200 | ) 201 | { 202 | this.flags = elementName.Flags; 203 | this.name = elementName.name; 204 | this.popName = popName; 205 | this.ns = "http://www.w3.org/1999/xhtml"; 206 | this.node = node; 207 | this.attributes = null; 208 | this.refcount = 1; 209 | // [NOCPP[ 210 | this.locator = locator; 211 | // ]NOCPP] 212 | } 213 | 214 | /// 215 | /// Constructor for SVG elements. Note that the order of the arguments is 216 | /// what distinguishes this from the HTML constructor. This is ugly, but 217 | /// AFAICT the least disruptive way to make this work with Java's generics 218 | /// and without unnecessary branches. :-( 219 | /// 220 | internal StackNode(ElementName elementName, [Local] string popName, T node 221 | // [NOCPP[ 222 | , TaintableLocator locator 223 | // ]NOCPP] 224 | ) 225 | { 226 | this.flags = PrepareSvgFlags(elementName.Flags); 227 | this.name = elementName.name; 228 | this.popName = popName; 229 | this.ns = "http://www.w3.org/2000/svg"; 230 | this.node = node; 231 | this.attributes = null; 232 | this.refcount = 1; 233 | // [NOCPP[ 234 | this.locator = locator; 235 | // ]NOCPP] 236 | } 237 | 238 | /// 239 | /// Constructor for MathML. 240 | /// 241 | internal StackNode(ElementName elementName, T node, [Local] string popName, 242 | bool markAsIntegrationPoint 243 | // [NOCPP[ 244 | , TaintableLocator locator 245 | // ]NOCPP] 246 | ) 247 | { 248 | this.flags = PrepareMathFlags(elementName.Flags, markAsIntegrationPoint); 249 | this.name = elementName.name; 250 | this.popName = popName; 251 | this.ns = "http://www.w3.org/1998/Math/MathML"; 252 | this.node = node; 253 | this.attributes = null; 254 | this.refcount = 1; 255 | // [NOCPP[ 256 | this.locator = locator; 257 | // ]NOCPP] 258 | } 259 | 260 | private static int PrepareSvgFlags(int flags) 261 | { 262 | flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING 263 | | ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG); 264 | if ((flags & ElementName.SCOPING_AS_SVG) != 0) 265 | { 266 | flags |= (ElementName.SCOPING | ElementName.SPECIAL | ElementName.HTML_INTEGRATION_POINT); 267 | } 268 | return flags; 269 | } 270 | 271 | private static int PrepareMathFlags(int flags, bool markAsIntegrationPoint) 272 | { 273 | flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING 274 | | ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG); 275 | if ((flags & ElementName.SCOPING_AS_MATHML) != 0) 276 | { 277 | flags |= (ElementName.SCOPING | ElementName.SPECIAL); 278 | } 279 | if (markAsIntegrationPoint) 280 | { 281 | flags |= ElementName.HTML_INTEGRATION_POINT; 282 | } 283 | return flags; 284 | } 285 | 286 | public void DropAttributes() 287 | { 288 | attributes = null; 289 | } 290 | 291 | // [NOCPP[ 292 | 293 | /// 294 | /// Returns a that represents this instance. 295 | /// 296 | /// 297 | /// A that represents this instance. 298 | /// 299 | override public String ToString() 300 | { 301 | return name; 302 | } 303 | 304 | // ]NOCPP] 305 | 306 | // TODO: probably we won't need these 307 | public void Retain() 308 | { 309 | refcount++; 310 | } 311 | 312 | public void Release() 313 | { 314 | refcount--; 315 | /*if (refcount == 0) { 316 | Portability.delete(this); 317 | }*/ 318 | } 319 | } 320 | } 321 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/StateSnapshot.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009-2010 Mozilla Foundation 3 | * Copyright (c) 2012 Patrick Reisert 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | * DEALINGS IN THE SOFTWARE. 22 | */ 23 | 24 | namespace HtmlParserSharp.Core 25 | { 26 | public class StateSnapshot : ITreeBuilderState where T : class 27 | { 28 | /// 29 | /// Gets the stack. 30 | /// 31 | public StackNode[] Stack { get; private set; } 32 | 33 | /// 34 | /// Gets the list of active formatting elements. 35 | /// 36 | public StackNode[] ListOfActiveFormattingElements { get; private set; } 37 | 38 | public T FormPointer { get; private set; } 39 | 40 | public T HeadPointer { get; private set; } 41 | 42 | public T DeepTreeSurrogateParent { get; private set; } 43 | 44 | /// 45 | /// Gets the mode. 46 | /// 47 | public InsertionMode Mode { get; private set; } 48 | 49 | /// 50 | /// Gets the original mode. 51 | /// 52 | public InsertionMode OriginalMode { get; private set; } 53 | 54 | /// 55 | /// Gets a value indicating whether this instance is frameset ok. 56 | /// 57 | /// 58 | /// true if this instance is frameset ok; otherwise, false. 59 | /// 60 | public bool IsFramesetOk { get; private set; } 61 | 62 | /// 63 | /// Gets a value indicating whether this instance is need to drop LF. 64 | /// 65 | /// 66 | /// true if this instance is need to drop LF; otherwise, false. 67 | /// 68 | public bool IsNeedToDropLF { get; private set; } 69 | 70 | /// 71 | /// Gets a value indicating whether this instance is quirks. 72 | /// 73 | /// 74 | /// true if this instance is quirks; otherwise, false. 75 | /// 76 | public bool IsQuirks { get; private set; } 77 | 78 | internal StateSnapshot(StackNode[] stack, 79 | StackNode[] listOfActiveFormattingElements, 80 | T formPointer, 81 | T headPointer, 82 | T deepTreeSurrogateParent, 83 | InsertionMode mode, 84 | InsertionMode originalMode, 85 | bool framesetOk, 86 | bool needToDropLF, 87 | bool quirks) 88 | { 89 | Stack = stack; 90 | ListOfActiveFormattingElements = listOfActiveFormattingElements; 91 | FormPointer = formPointer; 92 | HeadPointer = headPointer; 93 | DeepTreeSurrogateParent = deepTreeSurrogateParent; 94 | Mode = mode; 95 | OriginalMode = originalMode; 96 | IsFramesetOk = framesetOk; 97 | IsNeedToDropLF = needToDropLF; 98 | IsQuirks = quirks; 99 | } 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/TaintableLocator.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 Mozilla Foundation 3 | * Copyright (c) 2012 Patrick Reisert 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | * DEALINGS IN THE SOFTWARE. 22 | */ 23 | 24 | namespace HtmlParserSharp.Core 25 | { 26 | public class TaintableLocator : Locator 27 | { 28 | public TaintableLocator(ILocator locator) 29 | : base(locator) 30 | { 31 | IsTainted = false; 32 | } 33 | 34 | public void MarkTainted() 35 | { 36 | IsTainted = true; 37 | } 38 | 39 | public bool IsTainted { get; private set; } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/TreeBuilderConstants.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * The comments following this one that use the same comment syntax as this 3 | * comment are quotes from the WHATWG HTML 5 spec as of 27 June 2007 4 | * amended as of June 28 2007. 5 | * That document came with this statement: 6 | * © Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, and 7 | * Opera Software ASA. You are granted a license to use, reproduce and 8 | * create derivative works of this document." 9 | */ 10 | 11 | using HtmlParserSharp.Common; 12 | 13 | namespace HtmlParserSharp.Core 14 | { 15 | /// 16 | /// Moved the constants (and pseude-enums) out of the TreeBuilder class. 17 | /// 18 | public class TreeBuilderConstants 19 | { 20 | /// 21 | /// Array version of U+FFFD. 22 | /// 23 | internal static readonly char[] REPLACEMENT_CHARACTER = { '\uFFFD' }; 24 | 25 | // [NOCPP[ 26 | 27 | internal readonly static string[] HTML4_PUBLIC_IDS = { 28 | "-//W3C//DTD HTML 4.0 Frameset//EN", 29 | "-//W3C//DTD HTML 4.0 Transitional//EN", 30 | "-//W3C//DTD HTML 4.0//EN", "-//W3C//DTD HTML 4.01 Frameset//EN", 31 | "-//W3C//DTD HTML 4.01 Transitional//EN", 32 | "-//W3C//DTD HTML 4.01//EN" }; 33 | 34 | // ]NOCPP] 35 | 36 | internal readonly static string[] QUIRKY_PUBLIC_IDS = { 37 | "+//silmaril//dtd html pro v0r11 19970101//", 38 | "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", 39 | "-//as//dtd html 3.0 aswedit + extensions//", 40 | "-//ietf//dtd html 2.0 level 1//", 41 | "-//ietf//dtd html 2.0 level 2//", 42 | "-//ietf//dtd html 2.0 strict level 1//", 43 | "-//ietf//dtd html 2.0 strict level 2//", 44 | "-//ietf//dtd html 2.0 strict//", 45 | "-//ietf//dtd html 2.0//", 46 | "-//ietf//dtd html 2.1e//", 47 | "-//ietf//dtd html 3.0//", 48 | "-//ietf//dtd html 3.2 final//", 49 | "-//ietf//dtd html 3.2//", 50 | "-//ietf//dtd html 3//", 51 | "-//ietf//dtd html level 0//", 52 | "-//ietf//dtd html level 1//", 53 | "-//ietf//dtd html level 2//", 54 | "-//ietf//dtd html level 3//", 55 | "-//ietf//dtd html strict level 0//", 56 | "-//ietf//dtd html strict level 1//", 57 | "-//ietf//dtd html strict level 2//", 58 | "-//ietf//dtd html strict level 3//", 59 | "-//ietf//dtd html strict//", 60 | "-//ietf//dtd html//", 61 | "-//metrius//dtd metrius presentational//", 62 | "-//microsoft//dtd internet explorer 2.0 html strict//", 63 | "-//microsoft//dtd internet explorer 2.0 html//", 64 | "-//microsoft//dtd internet explorer 2.0 tables//", 65 | "-//microsoft//dtd internet explorer 3.0 html strict//", 66 | "-//microsoft//dtd internet explorer 3.0 html//", 67 | "-//microsoft//dtd internet explorer 3.0 tables//", 68 | "-//netscape comm. corp.//dtd html//", 69 | "-//netscape comm. corp.//dtd strict html//", 70 | "-//o'reilly and associates//dtd html 2.0//", 71 | "-//o'reilly and associates//dtd html extended 1.0//", 72 | "-//o'reilly and associates//dtd html extended relaxed 1.0//", 73 | "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", 74 | "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", 75 | "-//spyglass//dtd html 2.0 extended//", 76 | "-//sq//dtd html 2.0 hotmetal + extensions//", 77 | "-//sun microsystems corp.//dtd hotjava html//", 78 | "-//sun microsystems corp.//dtd hotjava strict html//", 79 | "-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//", 80 | "-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//", 81 | "-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//", 82 | "-//w3c//dtd html 4.0 transitional//", 83 | "-//w3c//dtd html experimental 19960712//", 84 | "-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//", 85 | "-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//", 86 | "-//webtechs//dtd mozilla html//" }; 87 | 88 | internal const int NOT_FOUND_ON_STACK = int.MaxValue; 89 | 90 | // [NOCPP[ 91 | 92 | [Local] 93 | internal const string HTML_LOCAL = "html"; 94 | 95 | // ]NOCPP] 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /HtmlParserSharp/Core/UTF16Buffer.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2008-2010 Mozilla Foundation 3 | * Copyright (c) 2012 Patrick Reisert 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | * DEALINGS IN THE SOFTWARE. 22 | */ 23 | 24 | namespace HtmlParserSharp.Core 25 | { 26 | /// 27 | /// An UTF-16 buffer that knows the start and end indeces of its unconsumed 28 | /// content. 29 | /// 30 | public sealed class UTF16Buffer 31 | { 32 | /// 33 | /// Gets the backing store of the buffer. May be larger than the logical content 34 | /// of this UTF16Buffer. 35 | /// 36 | public char[] Buffer { get; private set; } 37 | 38 | /// 39 | /// Gets or sets the index of the first unconsumed character in the backing buffer. 40 | /// 41 | public int Start { get; set; } 42 | 43 | /// 44 | /// Gets or sets the index of the slot immediately after the last character in the backing 45 | /// buffer that is part of the logical content of this UTF16Buffer. 46 | /// 47 | public int End { get; set; } 48 | 49 | /// 50 | /// Constructor for wrapping an existing UTF-16 code unit array. 51 | /// 52 | /// The backing buffer. 53 | /// The index of the first character to consume. 54 | /// The index immediately after the last character to consume. 55 | public UTF16Buffer(char[] buffer, int start, int end) 56 | { 57 | Buffer = buffer; 58 | Start = start; 59 | End = end; 60 | } 61 | 62 | /// 63 | /// Determines whether this instance has data left. 64 | /// 65 | /// 66 | /// true if there's data left; otherwise, false. 67 | /// 68 | public bool HasMore 69 | { 70 | get 71 | { 72 | return Start < End; 73 | } 74 | } 75 | 76 | /// 77 | /// Adjusts the start index to skip over the first character if it is a line 78 | /// feed and the previous character was a carriage return. 79 | /// 80 | /// Whether the previous character was a carriage return. 81 | public void Adjust(bool lastWasCR) 82 | { 83 | if (lastWasCR && Buffer[Start] == '\n') 84 | { 85 | Start++; 86 | } 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /HtmlParserSharp/HtmlParserSharp.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Debug 5 | AnyCPU 6 | 8.0.30703 7 | 2.0 8 | {FD150915-D34F-436A-92C1-80AA505DA754} 9 | Exe 10 | Properties 11 | HtmlParserSharp 12 | HtmlParser 13 | v4.0 14 | 512 15 | 16 | 17 | true 18 | full 19 | false 20 | bin\Debug\ 21 | DEBUG;TRACE 22 | prompt 23 | 4 24 | 25 | 26 | pdbonly 27 | true 28 | bin\Release\ 29 | TRACE 30 | prompt 31 | 4 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | Always 86 | 87 | 88 | 89 | 96 | -------------------------------------------------------------------------------- /HtmlParserSharp/Program.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Patrick Reisert 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | using System; 24 | using System.Collections.Generic; 25 | using System.Diagnostics; 26 | using System.IO; 27 | using System.Linq; 28 | using System.Xml.Linq; 29 | 30 | namespace HtmlParserSharp 31 | { 32 | /// 33 | /// This is contains a sample entry point for testing and benchmarks. 34 | /// 35 | public class Program 36 | { 37 | static SimpleHtmlParser parser = new SimpleHtmlParser(); 38 | 39 | private static IEnumerable GetTestFiles() 40 | { 41 | //DirectoryInfo dir = new DirectoryInfo("SampleData"); 42 | //return dir.GetFiles("*.html", SearchOption.AllDirectories); 43 | for (int i = 0; i < 10; i++) 44 | { 45 | yield return new FileInfo(Path.Combine("SampleData", "test.html")); 46 | } 47 | } 48 | 49 | public static void Main(string[] args) 50 | { 51 | //var fragment1 = parser.ParseStringFragment("foo", ""); 52 | //var fragment2 = parser.ParseStringFragment("foo", "table"); 53 | 54 | Stopwatch sw = new Stopwatch(); 55 | 56 | 57 | Console.Write("Parsing ... "); 58 | var result = GetTestFiles().Select((file) => 59 | { 60 | sw.Restart(); 61 | var doc = parser.Parse(file.FullName); 62 | sw.Stop(); 63 | var parseTime = sw.Elapsed; 64 | doc.Save("test.xml"); 65 | sw.Restart(); 66 | XDocument.Load("test.xml"); 67 | sw.Stop(); 68 | var reparseTime = sw.Elapsed; 69 | return new { Document = doc, Time = parseTime, ReparseTime = reparseTime }; 70 | } 71 | ).ToList(); 72 | 73 | TimeSpan total = result.Aggregate(new TimeSpan(), (passed, current) => passed + current.Time); 74 | TimeSpan reparseTotal = result.Aggregate(new TimeSpan(), (passed, current) => passed + current.ReparseTime); 75 | 76 | Console.WriteLine("done."); 77 | Console.WriteLine("Found " + result.Count + " documents."); 78 | Console.WriteLine(); 79 | PrintTime("Total", total); 80 | PrintTime("First", result.First().Time); 81 | PrintTime("Average", TimeSpan.FromTicks(total.Ticks / result.Count)); 82 | PrintTime("Average (without first)", TimeSpan.FromTicks((total.Ticks - result.First().Time.Ticks) / (result.Count - 1))); 83 | PrintTime("Min", result.Min(val => val.Time)); 84 | PrintTime("Max", result.Max(val => val.Time)); 85 | 86 | Console.WriteLine(); 87 | Console.WriteLine("=== Reparsing (XDocument) ==="); 88 | 89 | // note: reparsing using XmlDocument instead gives similar results 90 | 91 | PrintTime("Total", reparseTotal); 92 | PrintTime("First", result.First().ReparseTime); 93 | PrintTime("Average", TimeSpan.FromTicks(reparseTotal.Ticks / result.Count)); 94 | PrintTime("Average (without first)", TimeSpan.FromTicks((reparseTotal.Ticks - result.First().ReparseTime.Ticks) / (result.Count - 1))); 95 | PrintTime("Min", result.Min(val => val.ReparseTime)); 96 | PrintTime("Max", result.Max(val => val.ReparseTime)); 97 | Console.ReadKey(); 98 | } 99 | 100 | private static void PrintTime(string caption, TimeSpan time) 101 | { 102 | Console.WriteLine("{0}:\n {1} ({2} ms)", caption, time.ToString(), time.TotalMilliseconds); 103 | } 104 | 105 | 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /HtmlParserSharp/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // Allgemeine Informationen über eine Assembly werden über die folgenden 6 | // Attribute gesteuert. Ändern Sie diese Attributwerte, um die Informationen zu ändern, 7 | // die mit einer Assembly verknüpft sind. 8 | [assembly: AssemblyTitle("HtmlParser")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("HtmlParser")] 13 | [assembly: AssemblyCopyright("Copyright © 2012")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Durch Festlegen von ComVisible auf "false" werden die Typen in dieser Assembly unsichtbar 18 | // für COM-Komponenten. Wenn Sie auf einen Typ in dieser Assembly von 19 | // COM zugreifen müssen, legen Sie das ComVisible-Attribut für diesen Typ auf "true" fest. 20 | [assembly: ComVisible(false)] 21 | 22 | // Die folgende GUID bestimmt die ID der Typbibliothek, wenn dieses Projekt für COM verfügbar gemacht wird 23 | [assembly: Guid("dd2311df-4aa1-4f09-8fff-751cd048e652")] 24 | 25 | // Versionsinformationen für eine Assembly bestehen aus den folgenden vier Werten: 26 | // 27 | // Hauptversion 28 | // Nebenversion 29 | // Buildnummer 30 | // Revision 31 | // 32 | // Sie können alle Werte angeben oder die standardmäßigen Build- und Revisionsnummern 33 | // übernehmen, indem Sie "*" eingeben: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /HtmlParserSharp/SampleData/test.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Boddlnagg/HtmlParserSharp/bd48da4f4ce3b6309e32677bd1bebafd42ba280b/HtmlParserSharp/SampleData/test.html -------------------------------------------------------------------------------- /HtmlParserSharp/SimpleHtmlParser.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Patrick Reisert 3 | * Copyright (c) 2005, 2006, 2007 Henri Sivonen 4 | * Copyright (c) 2007-2008 Mozilla Foundation 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | * DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | using System; 26 | using System.IO; 27 | using System.Xml; 28 | using HtmlParserSharp.Core; 29 | 30 | namespace HtmlParserSharp 31 | { 32 | /// 33 | /// This is a simple API for the parsing process. 34 | /// Part of this is a port of the nu.validator.htmlparser.io.Driver class. 35 | /// The parser currently ignores the encoding in the html source and parses everything as UTF-8. 36 | /// 37 | public class SimpleHtmlParser 38 | { 39 | private Tokenizer tokenizer; 40 | private DomTreeBuilder treeBuilder; 41 | 42 | public XmlDocumentFragment ParseStringFragment(string str, string fragmentContext) 43 | { 44 | using (var reader = new StringReader(str)) 45 | return ParseFragment(reader, fragmentContext); 46 | } 47 | 48 | public XmlDocument ParseString(string str) 49 | { 50 | using (var reader = new StringReader(str)) 51 | return Parse(reader); 52 | } 53 | 54 | public XmlDocument Parse(string path) 55 | { 56 | using (var reader = new StreamReader(path)) 57 | return Parse(reader); 58 | } 59 | 60 | public XmlDocument Parse(TextReader reader) 61 | { 62 | Reset(); 63 | Tokenize(reader); 64 | return treeBuilder.Document; 65 | } 66 | 67 | public XmlDocumentFragment ParseFragment(TextReader reader, string fragmentContext) 68 | { 69 | Reset(); 70 | treeBuilder.SetFragmentContext(fragmentContext); 71 | Tokenize(reader); 72 | return treeBuilder.getDocumentFragment(); 73 | } 74 | 75 | private void Reset() 76 | { 77 | treeBuilder = new DomTreeBuilder(); 78 | tokenizer = new Tokenizer(treeBuilder, false); 79 | treeBuilder.IsIgnoringComments = false; 80 | 81 | // optionally: report errors and more 82 | 83 | //treeBuilder.ErrorEvent += 84 | // (sender, a) => 85 | // { 86 | // ILocator loc = tokenizer as ILocator; 87 | // Console.WriteLine("{0}: {1} (Line: {2})", a.IsWarning ? "Warning" : "Error", a.Message, loc.LineNumber); 88 | // }; 89 | //treeBuilder.DocumentModeDetected += (sender, a) => Console.WriteLine("Document mode: " + a.Mode.ToString()); 90 | //tokenizer.EncodingDeclared += (sender, a) => Console.WriteLine("Encoding: " + a.Encoding + " (currently ignored)"); 91 | } 92 | 93 | private void Tokenize(TextReader reader) 94 | { 95 | if (reader == null) 96 | { 97 | throw new ArgumentNullException("reader was null."); 98 | } 99 | 100 | tokenizer.Start(); 101 | bool swallowBom = true; 102 | 103 | try 104 | { 105 | char[] buffer = new char[2048]; 106 | UTF16Buffer bufr = new UTF16Buffer(buffer, 0, 0); 107 | bool lastWasCR = false; 108 | int len = -1; 109 | if ((len = reader.Read(buffer, 0, buffer.Length)) != 0) 110 | { 111 | int streamOffset = 0; 112 | int offset = 0; 113 | int length = len; 114 | if (swallowBom) 115 | { 116 | if (buffer[0] == '\uFEFF') 117 | { 118 | streamOffset = -1; 119 | offset = 1; 120 | length--; 121 | } 122 | } 123 | if (length > 0) 124 | { 125 | tokenizer.SetTransitionBaseOffset(streamOffset); 126 | bufr.Start = offset; 127 | bufr.End = offset + length; 128 | while (bufr.HasMore) 129 | { 130 | bufr.Adjust(lastWasCR); 131 | lastWasCR = false; 132 | if (bufr.HasMore) 133 | { 134 | lastWasCR = tokenizer.TokenizeBuffer(bufr); 135 | } 136 | } 137 | } 138 | streamOffset = length; 139 | while ((len = reader.Read(buffer, 0, buffer.Length)) != 0) 140 | { 141 | tokenizer.SetTransitionBaseOffset(streamOffset); 142 | bufr.Start = 0; 143 | bufr.End = len; 144 | while (bufr.HasMore) 145 | { 146 | bufr.Adjust(lastWasCR); 147 | lastWasCR = false; 148 | if (bufr.HasMore) 149 | { 150 | lastWasCR = tokenizer.TokenizeBuffer(bufr); 151 | } 152 | } 153 | streamOffset += len; 154 | } 155 | } 156 | tokenizer.Eof(); 157 | } 158 | finally 159 | { 160 | tokenizer.End(); 161 | } 162 | } 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HtmlParserSharp 2 | =============== 3 | 4 | This is a manual C# port of the [Validator.nu HTML Parser](http://about.validator.nu/htmlparser/), a HTML5 parser originally written in Java and (compiled to C++ using the Google Web Toolkit) used by Mozilla's Gecko rendering engine. The port uses the DOM implemented in [System.Xml](http://msdn.microsoft.com/en-us/library/system.xml.aspx). 5 | 6 | Status 7 | ------ 8 | PLEASE SEE https://github.com/jamietre/HtmlParserSharp FOR AN ACTIVELY MAINTAINED VERSION OF THIS PROJECT. 9 | 10 | Currently the port is based on Validator.nu 1.3.1 and works, as far as I have tested it. However as there are no unit tests, I'm not sure if every detail is working correctly. Tests showed that it is quite fast (about 3-6 times slower than parsing XML using .NET's XDocument API, but I think XML parsing is easier to implement, so this is okay and it's still FAST). 11 | 12 | What's missing 13 | -------------- 14 | If you want to contribute, maybe you can start here: 15 | 16 | * Support for character encodings other than UTF-8 17 | * More C#-ish coding style 18 | * Unit tests 19 | * Look for TODOs in the code 20 | --------------------------------------------------------------------------------