├── Test
├── Test References
│ └── NSoup.accessor
├── htmltests
│ ├── xml-test.xml
│ ├── meta-charset-3.html
│ ├── thumb.jpg
│ ├── baidu-cn-home.html
│ ├── baidu-variant.html
│ ├── meta-charset-1.html
│ ├── meta-charset-2.html
│ └── README
├── NSoupTest.snk
├── TextUtil.cs
├── Properties
│ └── AssemblyInfo.cs
├── Select
│ └── QueryParserTest.cs
├── Integration
│ ├── Benchmark.cs
│ └── ParseTest.cs
├── Parser
│ ├── TagTest.cs
│ ├── XmlTreeBuilderTest.cs
│ ├── TokenQueueTests.cs
│ └── AttributeParseTest.cs
├── Helper
│ ├── DataUtilTest.cs
│ ├── StringUtilTest.cs
│ └── HttpConnectionTest.cs
├── Nodes
│ ├── TextNodeTest.cs
│ ├── EntitiesTest.cs
│ └── DocumentTest.cs
└── Test.csproj
├── NSoup.suo
├── NSoup.v11.suo
├── NSoup
├── NSoup.snk
├── NSoup.csproj.user
├── HttpStatusException.cs
├── UnsupportedMimeTypeException.cs
├── Parse
│ ├── ParseErrorList.cs
│ ├── ParseError.cs
│ ├── TreeBuilder.cs
│ ├── XmlTreeBuilder.cs
│ ├── Parser.cs
│ └── Token.cs
├── Select
│ ├── NodeVisitor.cs
│ ├── NodeTraversor.cs
│ ├── Collector.cs
│ ├── CombiningEvaluator.cs
│ ├── StructuralEvaluator.cs
│ └── Selector.cs
├── Nodes
│ ├── entities-base.txt
│ ├── Comment.cs
│ ├── DocumentType.cs
│ ├── XmlDeclaration.cs
│ ├── DataNode.cs
│ ├── Attribute.cs
│ ├── TextNode.cs
│ └── Entities.cs
├── Properties
│ └── AssemblyInfo.cs
├── Helper
│ ├── DescendableLinkedList.cs
│ ├── StringUtil.cs
│ ├── DataUtil.cs
│ └── LinkedHashSet.cs
├── NSoup.csproj
└── Safety
│ └── Cleaner.cs
├── LocalTestRun.testrunconfig
├── README.md
├── LICENSE
├── .gitignore
├── NSoup.sln
└── NSoup.vsmdi
/Test/Test References/NSoup.accessor:
--------------------------------------------------------------------------------
1 | NSoup.dll
2 | Desktop
3 |
--------------------------------------------------------------------------------
/NSoup.suo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GeReV/NSoup/HEAD/NSoup.suo
--------------------------------------------------------------------------------
/NSoup.v11.suo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GeReV/NSoup/HEAD/NSoup.v11.suo
--------------------------------------------------------------------------------
/NSoup/NSoup.snk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GeReV/NSoup/HEAD/NSoup/NSoup.snk
--------------------------------------------------------------------------------
/Test/htmltests/xml-test.xml:
--------------------------------------------------------------------------------
1 |
<!DOCTPYE> node.
11 | /// One two & three &
"); 87 | Element p = doc.Select("p").First; 88 | 89 | Element span = doc.Select("span").First; 90 | Assert.AreEqual("two &", span.Text()); 91 | TextNode spanText = (TextNode)span.ChildNodes[0]; 92 | Assert.AreEqual("two &", spanText.Text()); 93 | 94 | TextNode tn = (TextNode)p.ChildNodes[2]; 95 | Assert.AreEqual(" three &", tn.Text()); 96 | 97 | tn.Text(" POW!"); 98 | Assert.AreEqual("One two & POW!", TextUtil.StripNewLines(p.Html())); 99 | 100 | tn.Attr("text", "kablam &"); 101 | Assert.AreEqual("kablam &", tn.Text()); 102 | Assert.AreEqual("One two &kablam &", TextUtil.StripNewLines(p.Html())); 103 | } 104 | 105 | [TestMethod] 106 | public void testSplitText() 107 | { 108 | Document doc = NSoup.NSoupClient.Parse("¹²³¼½¾
"; 119 | Document doc = NSoupClient.Parse(html); 120 | Element p = doc.Select("p").First; 121 | Assert.AreEqual("¹²³¼½¾", p.Html()); 122 | Assert.AreEqual("¹²³¼½¾", p.Text()); 123 | } 124 | 125 | [TestMethod] 126 | public void noSpuriousDecodes() 127 | { 128 | string s = "http://www.foo.com?a=1&num_rooms=1&children=0&int=VA&b=2"; 129 | Assert.AreEqual(s, Entities.Unescape(s)); 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /NSoup/NSoup.csproj: -------------------------------------------------------------------------------- 1 | 2 |href="index.html"
105 | /// Hello
"); 72 | doc.Text("Replaced"); 73 | Assert.AreEqual("Replaced", doc.Text()); 74 | Assert.AreEqual("Replaced", doc.Body.Text()); 75 | Assert.AreEqual(1, doc.Select("head").Count); 76 | } 77 | 78 | [TestMethod] 79 | public void testTitles() 80 | { 81 | Document noTitle = NSoup.NSoupClient.Parse("Hello
"); 82 | Document withTitle = NSoup.NSoupClient.Parse("Hello
"); 83 | 84 | Assert.AreEqual("", noTitle.Title); 85 | noTitle.Title = "Hello"; 86 | Assert.AreEqual("Hello", noTitle.Title); 87 | Assert.AreEqual("Hello", noTitle.Select("title").First.Text()); 88 | 89 | Assert.AreEqual("First", withTitle.Title); 90 | withTitle.Title = "Hello"; 91 | Assert.AreEqual("Hello", withTitle.Title); 92 | Assert.AreEqual("Hello", withTitle.Select("title").First.Text()); 93 | 94 | Document normaliseTitle = NSoupClient.Parse("π & < >
"); 102 | // default is utf-8 103 | Assert.AreEqual("π & < >
", doc.Body.Html()); 104 | Assert.AreEqual("UTF-8", doc.OutputSettings().Encoding.WebName.ToUpperInvariant()); 105 | 106 | doc.OutputSettings().SetEncoding("ascii"); 107 | Assert.AreEqual(Entities.EscapeMode.Base, doc.OutputSettings().EscapeMode); 108 | Assert.AreEqual("π & < >
", doc.Body.Html()); 109 | 110 | doc.OutputSettings().SetEscapeMode(Entities.EscapeMode.Extended); 111 | Assert.AreEqual("π & < >
", doc.Body.Html()); 112 | } 113 | 114 | [TestMethod] 115 | public void testXhtmlReferences() 116 | { 117 | Document doc = NSoupClient.Parse("< > & " ' ×"); 118 | doc.OutputSettings().SetEscapeMode(Entities.EscapeMode.Xhtml); 119 | Assert.AreEqual("< > & " ' ×", doc.Body.Html()); 120 | } 121 | 122 | [TestMethod] 123 | public void testNormalisesStructure() 124 | { 125 | Document doc = NSoupClient.Parse("three
four
"); 126 | Assert.AreEqual("two
three
four
", TextUtil.StripNewLines(doc.Html())); 127 | } 128 | 129 | [TestMethod] 130 | public void testClone() 131 | { 132 | Document doc = NSoupClient.Parse("One
Two"); 133 | Document clone = (Document)doc.Clone(); 134 | 135 | Assert.AreEqual("
One
Two
", TextUtil.StripNewLines(clone.Html())); 136 | clone.Title = "Hello there"; 137 | clone.Select("p").First.Text("One more").Attr("id", "1"); 138 | Assert.AreEqual("One more
Two
", TextUtil.StripNewLines(clone.Html())); 139 | Assert.AreEqual("One
Two
", TextUtil.StripNewLines(doc.Html())); 140 | } 141 | 142 | [TestMethod] 143 | public void testClonesDeclarations() 144 | { 145 | Document doc = NSoupClient.Parse("Clean methods in body are used.
41 | /// body of a Document.
111 | /// | Pattern | Matches | Example | |
|---|---|---|---|
* | any element | * | |
tag | elements with the given tag name | div | |
ns|E | elements of type E in the namespace ns | fb|name finds <fb:name> elements | |
#id | elements with attribute ID of "id" | div#wrap, #logo | |
.class | elements with a class name of "class" | div.left, .result | |
[attr] | elements with an attribute named "attr" (with any value) | a[href], [title] | |
[^attrPrefix] | elements with an attribute name starting with "attrPrefix". Use to find elements with HTML5 datasets | [^data-], div[^data-] | |
[attr=val] | elements with an attribute named "attr", and value equal to "val" | img[width=500], a[rel=nofollow] | |
[attr^=valPrefix] | elements with an attribute named "attr", and value starting with "valPrefix" | a[href^=http:] | |
[attr$=valSuffix] | elements with an attribute named "attr", and value ending with "valSuffix" | img[src$=.png] | |
[attr*=valContaining] | elements with an attribute named "attr", and value containing "valContaining" | a[href*=/search/] | |
[attr~=regex] | elements with an attribute named "attr", and value matching the regular expression | img[src~=(?i)\\.(png|jpe?g)] | |
| The above may be combined in any order | div.header[title] | ||
Combinators | |||
E F | an F element descended from an E element | div a, .logo h1 | |
E > F | an F direct child of E | ol > li | |
E + F | an F element immediately preceded by sibling E | li + li, div.head + div | |
E ~ F | an F element preceded by sibling E | h1 ~ p | |
E, F, G | all matching elements E, F, or G | a[href], div, h3 | |
Pseudo selectors | |||
:lt(n) | elements whose sibling index is less than n | td:lt(3) finds the first 2 cells of each row | |
:gt(n) | elements whose sibling index is greater than n | td:gt(1) finds cells after skipping the first two | |
:eq(n) | elements whose sibling index is equal to n | td:eq(0) finds the first cell of each row | |
:has(selector) | elements that contains at least one element matching the selector | div:has(p) finds divs that contain p elements | |
:not(selector) | elements that do not match the selector. See also {@link Elements#not(String)} | div:not(.logo) finds all divs that do not have the "logo" class.div:not(:has(div)) finds divs that do not contain divs. | |
:contains(text) | elements that contains the specified text. The search is case insensitive. The text may appear in the found element, or any of its descendants. | p:contains(jsoup) finds p elements containing the text "jsoup". | |
:matches(regex) | elements whose text matches the specified regular expression. The text may appear in the found element, or any of its descendants. | td:matches(\\d+) finds table cells containing digits. div:matches((?i)login) finds divs containing the text, case insensitively. | |
:containsOwn(text) | elements that directly contains the specified text. The search is case insensitive. The text must appear in the found element, not any of its descendants. | p:containsOwn(jsoup) finds p elements with own text "jsoup". | |
:matchesOwn(regex) | elements whose own text matches the specified regular expression. The text must appear in the found element, not any of its descendants. | td:matchesOwn(\\d+) finds table cells directly containing digits. div:matchesOwn((?i)login) finds divs containing the text, case insensitively. | |
| The above may be combined in any order and with other selectors | .light:contains(name):eq(0) | ||