├── .editorconfig
├── .gitattributes
├── .gitignore
├── .nuget
├── NuGet.Config
├── NuGet.exe
└── NuGet.targets
├── Assets
├── github-header.png
└── readsharp.png
├── CHANGELOG.md
├── LICENSE-MIT
├── PortablePorts
├── NReadability
│ ├── AttributeTransformationInput.cs
│ ├── AttributeTransformationResult.cs
│ ├── ChildNodesTraverser.cs
│ ├── Consts.cs
│ ├── DomExtensions.cs
│ ├── DomSerializationParams.cs
│ ├── ElementsTraverser.cs
│ ├── EncodedStringWriter.cs
│ ├── EnumerableExtensions.cs
│ ├── Enums.cs
│ ├── HtmlUtils.cs
│ ├── InternalErrorException.cs
│ ├── MetaExtractor.cs
│ ├── NReadability.csproj
│ ├── NReadabilityTranscoder.cs
│ ├── Properties
│ │ └── AssemblyInfo.cs
│ ├── Resources
│ │ └── readability.css
│ ├── SgmlDomBuilder.cs
│ ├── SgmlDomSerializer.cs
│ ├── TranscodingInput.cs
│ ├── TranscodingResult.cs
│ ├── UtilityExtensions.cs
│ ├── WebTranscodingInput.cs
│ └── WebTranscodingResult.cs
└── SgmlReader
│ ├── HTMLspecial.ent
│ ├── HTMLsymbol.ent
│ ├── Html.dtd
│ ├── Properties
│ └── AssemblyInfo.cs
│ ├── SgmlParser.cs
│ ├── SgmlReader.cs
│ ├── SgmlReader.csproj
│ └── htmllat1.ent
├── README.md
├── ReadSharp.Tests
├── Properties
│ └── AssemblyInfo.cs
├── ReadSharp.Tests.csproj
├── ReadTests.cs
├── TestsBase.cs
├── app.config
└── packages.config
├── ReadSharp.sln
└── ReadSharp
├── Encodings
├── Encoder.cs
├── Iso88591.cs
├── Iso885913.cs
├── Iso885915.cs
├── Iso88592.cs
├── Iso88593.cs
├── Iso88594.cs
├── Iso88595.cs
├── Iso88596.cs
├── Iso88597.cs
├── Iso88598.cs
├── Iso88599.cs
├── Windows1250.cs
├── Windows1251.cs
├── Windows1252.cs
├── Windows1253.cs
├── Windows1254.cs
├── Windows1255.cs
├── Windows1256.cs
├── Windows1257.cs
└── Windows1258.cs
├── HtmlUtilities.cs
├── IReader.cs
├── Models
├── Article.cs
├── ArticleImage.cs
├── HttpOptions.cs
├── ReadOptions.cs
└── Response.cs
├── Properties
└── AssemblyInfo.cs
├── ReadException.cs
├── ReadSharp.csproj
├── ReadSharp.nuspec
├── Reader.cs
└── packages.config
/.editorconfig:
--------------------------------------------------------------------------------
1 | ; This file is for unifying the coding style for different editors and IDEs
2 | ; editorconfig.org
3 |
4 | root = true
5 |
6 | [*]
7 | end_of_line = crlf
8 | charset = utf-8
9 | trim_trailing_whitespace = true
10 | insert_final_newline = true
11 | indent_style = space
12 | indent_size = 2
13 |
14 | [*.md]
15 | trim_trailing_whitespace = false
16 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text
3 |
4 | # Don't check these into the repo as LF to work around TeamCity bug
5 | *.xml -text
6 | *.targets -text
7 |
8 | # Custom for Visual Studio
9 | *.cs diff=csharp
10 | *.sln merge=union
11 | *.csproj merge=union
12 | *.vbproj merge=union
13 | *.fsproj merge=union
14 | *.dbproj merge=union
15 |
16 | # Denote all files that are truly binary and should not be modified.
17 | *.dll binary
18 | *.exe binary
19 | *.png binary
20 | *.ico binary
21 | *.snk binary
22 | *.pdb binary
23 | *.svg binary
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and files generated by popular Visual Studio add-ons.
2 |
3 | # User-specific files
4 | *.suo
5 | *.user
6 | *.sln.docstates
7 |
8 | # Build results
9 |
10 | [Dd]ebug/
11 | [Rr]elease/
12 | x64/
13 | build/
14 | [Bb]in/
15 | [Oo]bj/
16 |
17 | # configuration
18 | Web.Release.config
19 | Web.Debug.config
20 |
21 | # MSTest test Results
22 | [Tt]est[Rr]esult*/
23 | [Bb]uild[Ll]og.*
24 |
25 | *_i.c
26 | *_p.c
27 | *.ilk
28 | *.meta
29 | *.obj
30 | *.pch
31 | *.pdb
32 | *.pgc
33 | *.pgd
34 | *.rsp
35 | *.sbr
36 | *.tlb
37 | *.tli
38 | *.tlh
39 | *.tmp
40 | *.tmp_proj
41 | *.log
42 | *.vspscc
43 | *.vssscc
44 | .builds
45 | *.pidb
46 | *.log
47 | *.scc
48 |
49 | # Visual C++ cache files
50 | ipch/
51 | *.aps
52 | *.ncb
53 | *.opensdf
54 | *.sdf
55 | *.cachefile
56 |
57 | # Visual Studio profiler
58 | *.psess
59 | *.vsp
60 | *.vspx
61 |
62 | # Guidance Automation Toolkit
63 | *.gpState
64 |
65 | # ReSharper is a .NET coding add-in
66 | _ReSharper*/
67 | *.[Rr]e[Ss]harper
68 |
69 | # TeamCity is a build add-in
70 | _TeamCity*
71 |
72 | # DotCover is a Code Coverage Tool
73 | *.dotCover
74 |
75 | # NCrunch
76 | *.ncrunch*
77 | .*crunch*.local.xml
78 |
79 | # Installshield output folder
80 | [Ee]xpress/
81 |
82 | # DocProject is a documentation generator add-in
83 | DocProject/buildhelp/
84 | DocProject/Help/*.HxT
85 | DocProject/Help/*.HxC
86 | DocProject/Help/*.hhc
87 | DocProject/Help/*.hhk
88 | DocProject/Help/*.hhp
89 | DocProject/Help/Html2
90 | DocProject/Help/html
91 |
92 | # Click-Once directory
93 | publish/
94 |
95 | # Publish Web Output
96 | *.Publish.xml
97 | *.pubxml
98 |
99 | # NuGet Packages Directory
100 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line
101 | packages/
102 | *.nupkg
103 |
104 | # Windows Azure Build Output
105 | csx
106 | *.build.csdef
107 |
108 | # Windows Store app package directory
109 | AppPackages/
110 |
111 | # Others
112 | sql/
113 | *.Cache
114 | ClientBin/
115 | [Ss]tyle[Cc]op.*
116 | ~$*
117 | *~
118 | *.dbmdl
119 | *.[Pp]ublish.xml
120 | *.pfx
121 | *.publishsettings
122 | .sass-cache
123 | node_modules
124 | *.[L|l]og
125 | tmp
126 | _old
127 | _tmp
128 | Gemfile.lock
129 | WebWorkbench.mswwsettings
130 |
131 | # RIA/Silverlight projects
132 | Generated_Code/
133 |
134 | # Backup & report files from converting an old project file to a newer
135 | # Visual Studio version. Backup files are not needed, because we have git ;-)
136 | _UpgradeReport_Files/
137 | Backup*/
138 | UpgradeLog*.XML
139 | UpgradeLog*.htm
140 |
141 | # SQL Server files
142 | App_Data/*.mdf
143 | App_Data/*.ldf
144 |
145 | # =========================
146 | # Windows detritus
147 | # =========================
148 |
149 | # Windows image file caches
150 | Thumbs.db
151 | ehthumbs.db
152 |
153 | # Folder config file
154 | Desktop.ini
155 |
156 | # Recycle Bin used on file shares
157 | $RECYCLE.BIN/
158 |
159 | # Mac crap
160 | .DS_Store
161 |
162 |
163 | # =========================
164 | # Project
165 | # =========================
166 |
--------------------------------------------------------------------------------
/.nuget/NuGet.Config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.nuget/NuGet.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceee/ReadSharp/9d49aa9a8c2f6f4db60348ba5fd5692f51cd439b/.nuget/NuGet.exe
--------------------------------------------------------------------------------
/.nuget/NuGet.targets:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | $(MSBuildProjectDirectory)\..\
5 |
6 |
7 | false
8 |
9 |
10 | false
11 |
12 |
13 | true
14 |
15 |
16 | false
17 |
18 |
19 |
20 |
21 |
22 |
26 |
27 |
28 |
29 |
30 | $([System.IO.Path]::Combine($(SolutionDir), ".nuget"))
31 | $([System.IO.Path]::Combine($(ProjectDir), "packages.config"))
32 |
33 |
34 |
35 |
36 | $(SolutionDir).nuget
37 | packages.config
38 |
39 |
40 |
41 |
42 | $(NuGetToolsPath)\NuGet.exe
43 | @(PackageSource)
44 |
45 | "$(NuGetExePath)"
46 | mono --runtime=v4.0.30319 $(NuGetExePath)
47 |
48 | $(TargetDir.Trim('\\'))
49 |
50 | -RequireConsent
51 | -NonInteractive
52 |
53 | "$(SolutionDir) "
54 | "$(SolutionDir)"
55 |
56 |
57 | $(NuGetCommand) install "$(PackagesConfig)" -source "$(PackageSources)" $(NonInteractiveSwitch) $(RequireConsentSwitch) -solutionDir $(PaddedSolutionDir)
58 | $(NuGetCommand) pack "$(ProjectPath)" -Properties "Configuration=$(Configuration);Platform=$(Platform)" $(NonInteractiveSwitch) -OutputDirectory "$(PackageOutputDir)" -symbols
59 |
60 |
61 |
62 | RestorePackages;
63 | $(BuildDependsOn);
64 |
65 |
66 |
67 |
68 | $(BuildDependsOn);
69 | BuildPackage;
70 |
71 |
72 |
73 |
74 |
75 |
76 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
91 |
92 |
95 |
96 |
97 |
98 |
100 |
101 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
133 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/Assets/github-header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceee/ReadSharp/9d49aa9a8c2f6f4db60348ba5fd5692f51cd439b/Assets/github-header.png
--------------------------------------------------------------------------------
/Assets/readsharp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceee/ReadSharp/9d49aa9a8c2f6f4db60348ba5fd5692f51cd439b/Assets/readsharp.png
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ### 6.2.2
2 |
3 | - Option (`PreferHTMLEncoding`) to either prefer HTML or HTTP encoding for generating content
4 |
5 | ### 6.2.0
6 |
7 | - Option to replace images with placeholders
8 |
9 | ### 6.1.0
10 |
11 | - Add RAW HTML content to Article
12 |
13 | ### 6.0.0
14 |
15 | - Support for Universal apps (dropped SL and WP7 support)
16 |
17 | ### 5.0.0
18 |
19 | - HttpOptions for better control over the request
20 | - More reliable scraping of images
21 | - Remove unnecessary attributes from tags
22 | - Allow parsing of multi-page articles
23 |
24 | ### 4.2.3
25 |
26 | - add PrettyPrint option
27 |
28 | ### 4.2.2
29 |
30 | - use encoding found in HTTP headers in first iteration (fixes [issue #6](https://github.com/ceee/ReadSharp/issues/6))
31 |
32 | ### 4.2.1
33 |
34 | - fixes [issue #3](https://github.com/ceee/ReadSharp/issues/3)
35 |
36 | ### 4.2.0
37 |
38 | - use custom encoders if not supported on platform (implemented for ISO-8859 and Windows range).
39 |
40 | ### 4.1.0
41 |
42 | - extract description, favicon and front image from meta tags
43 | - correct encoding - retry reading stream with charset from HTML headers, if not available in HTTP headers or not matching (fixes #1)
44 |
45 | ### 4.0.0
46 |
47 | - migrate PocketSharp.Reader to ReadSharp
48 |
--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014 cee, Tobias Klika
2 |
3 | Permission is hereby granted, free of charge, to any person
4 | obtaining a copy of this software and associated documentation
5 | files (the "Software"), to deal in the Software without
6 | restriction, including without limitation the rights to use,
7 | copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the
9 | Software is furnished to do so, subject to the following
10 | conditions:
11 |
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/AttributeTransformationInput.cs:
--------------------------------------------------------------------------------
1 | using System.Xml.Linq;
2 |
3 | namespace ReadSharp.Ports.NReadability
4 | {
5 | public class AttributeTransformationInput
6 | {
7 | public string AttributeValue { get; set; }
8 |
9 | public string ArticleUrl { get; set; }
10 |
11 | public XElement Element { get; set; }
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/AttributeTransformationResult.cs:
--------------------------------------------------------------------------------
1 | namespace ReadSharp.Ports.NReadability
2 | {
3 | public class AttributeTransformationResult
4 | {
5 | ///
6 | /// Result of the transformation.
7 | ///
8 | public string TransformedValue { get; set; }
9 |
10 | ///
11 | /// Name of the attribute that will be used to store the original value. Can be null.
12 | ///
13 | public string OriginalValueAttributeName { get; set; }
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/ChildNodesTraverser.cs:
--------------------------------------------------------------------------------
1 | /*
2 | * NReadability
3 | * http://code.google.com/p/nreadability/
4 | *
5 | * Copyright 2010 Marek Stój
6 | * http://immortal.pl/
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | using System;
22 | using System.Xml.Linq;
23 |
24 | namespace ReadSharp.Ports.NReadability
25 | {
26 | internal class ChildNodesTraverser
27 | {
28 | private readonly Action _childNodeVisitor;
29 |
30 | #region Constructor(s)
31 |
32 | public ChildNodesTraverser(Action childNodeVisitor)
33 | {
34 | if (childNodeVisitor == null)
35 | {
36 | throw new ArgumentNullException("childNodeVisitor");
37 | }
38 |
39 | _childNodeVisitor = childNodeVisitor;
40 | }
41 |
42 | #endregion
43 |
44 | #region Public methods
45 |
46 | public void Traverse(XNode node)
47 | {
48 | if (!(node is XContainer))
49 | {
50 | throw new ArgumentException("The node must be an XContainer in order to traverse its children.");
51 | }
52 |
53 | var childNode = ((XContainer)node).FirstNode;
54 |
55 | while (childNode != null)
56 | {
57 | var nextChildNode = childNode.NextNode;
58 |
59 | _childNodeVisitor(childNode);
60 |
61 | childNode = nextChildNode;
62 | }
63 | }
64 |
65 | #endregion
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/Consts.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 |
3 | namespace ReadSharp.Ports.NReadability
4 | {
5 | public static class Consts
6 | {
7 | private static readonly string _nReadabilityFullName;
8 |
9 | #region Constructor(s)
10 |
11 | static Consts()
12 | {
13 | _nReadabilityFullName = string.Format("NReadability {0}", typeof(Consts).GetTypeInfo().Assembly.FullName);
14 | }
15 |
16 | #endregion
17 |
18 | #region Properties
19 |
20 | public static string NReadabilityFullName
21 | {
22 | get { return _nReadabilityFullName; }
23 | }
24 |
25 | #endregion
26 |
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/DomExtensions.cs:
--------------------------------------------------------------------------------
1 | /*
2 | * NReadability
3 | * http://code.google.com/p/nreadability/
4 | *
5 | * Copyright 2010 Marek Stój
6 | * http://immortal.pl/
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | using System;
22 | using System.Collections.Generic;
23 | using System.Linq;
24 | using System.Text;
25 | using System.Text.RegularExpressions;
26 | using System.Xml.Linq;
27 |
28 | namespace ReadSharp.Ports.NReadability
29 | {
30 | public static class DomExtensions
31 | {
32 | // filters control characters but allows only properly-formed surrogate sequences
33 | private static Regex _invalidXMLChars = new Regex(@"(?
36 | /// removes any unusual unicode characters that can't be encoded into XML
37 | ///
38 | public static string RemoveInvalidXMLChars(string text)
39 | {
40 | if (String.IsNullOrEmpty(text)) return "";
41 | return _invalidXMLChars.Replace(text, "");
42 | }
43 |
44 | #region XDocument extensions
45 |
46 | public static XElement GetBody(this XDocument document)
47 | {
48 | if (document == null)
49 | {
50 | throw new ArgumentNullException("document");
51 | }
52 |
53 | var documentRoot = document.Root;
54 |
55 | if (documentRoot == null)
56 | {
57 | return null;
58 | }
59 |
60 | return documentRoot.GetElementsByTagName("body").FirstOrDefault();
61 | }
62 |
63 | public static string GetTitle(this XDocument document)
64 | {
65 | if (document == null)
66 | {
67 | throw new ArgumentNullException("document");
68 | }
69 |
70 | var documentRoot = document.Root;
71 |
72 | if (documentRoot == null)
73 | {
74 | return null;
75 | }
76 |
77 | var headElement = documentRoot.GetElementsByTagName("head").FirstOrDefault();
78 |
79 | if (headElement == null)
80 | {
81 | return "";
82 | }
83 |
84 | var titleElement = headElement.GetChildrenByTagName("title").FirstOrDefault();
85 |
86 | if (titleElement == null)
87 | {
88 | return "";
89 | }
90 |
91 | return (titleElement.Value ?? "").Trim();
92 | }
93 |
94 | public static XElement GetElementById(this XDocument document, string id)
95 | {
96 | if (document == null)
97 | {
98 | throw new ArgumentNullException("document");
99 | }
100 |
101 | if (string.IsNullOrEmpty(id))
102 | {
103 | throw new ArgumentNullException("id");
104 | }
105 |
106 | return
107 | (from element in document.Descendants()
108 | let idAttribute = element.Attribute("id")
109 | where idAttribute != null && idAttribute.Value == id
110 | select element).SingleOrDefault();
111 | }
112 |
113 | #endregion
114 |
115 | #region XElement extensions
116 |
117 | public static string GetId(this XElement element)
118 | {
119 | return element.GetAttributeValue("id", "");
120 | }
121 |
122 | public static void SetId(this XElement element, string id)
123 | {
124 | element.SetAttributeValue("id", id);
125 | }
126 |
127 | public static string GetClass(this XElement element)
128 | {
129 | return element.GetAttributeValue("class", "");
130 | }
131 |
132 | public static void SetClass(this XElement element, string @class)
133 | {
134 | element.SetAttributeValue("class", @class);
135 | }
136 |
137 | public static string GetStyle(this XElement element)
138 | {
139 | return element.GetAttributeValue("style", "");
140 | }
141 |
142 | public static void SetStyle(this XElement element, string style)
143 | {
144 | element.SetAttributeValue("style", style);
145 | }
146 |
147 | public static string GetAttributeValue(this XElement element, string attributeName, string defaultValue)
148 | {
149 | if (element == null)
150 | {
151 | throw new ArgumentNullException("element");
152 | }
153 |
154 | if (string.IsNullOrEmpty(attributeName))
155 | {
156 | throw new ArgumentNullException("attributeName");
157 | }
158 |
159 | var attribute = element.Attribute(attributeName);
160 |
161 | return attribute != null
162 | ? (attribute.Value ?? defaultValue)
163 | : defaultValue;
164 | }
165 |
166 | public static void SetAttributeValue(this XElement element, string attributeName, string value)
167 | {
168 | if (element == null)
169 | {
170 | throw new ArgumentNullException("element");
171 | }
172 |
173 | if (string.IsNullOrEmpty(attributeName))
174 | {
175 | throw new ArgumentNullException("attributeName");
176 | }
177 |
178 | if (value == null)
179 | {
180 | var attribute = element.Attribute(attributeName);
181 |
182 | if (attribute != null)
183 | {
184 | attribute.Remove();
185 | }
186 | }
187 | else
188 | {
189 | element.SetAttributeValue(attributeName, value);
190 | }
191 | }
192 |
193 | public static string GetAttributesString(this XElement element, string separator)
194 | {
195 | if (element == null)
196 | {
197 | throw new ArgumentNullException("element");
198 | }
199 |
200 | if (separator == null)
201 | {
202 | throw new ArgumentNullException("separator");
203 | }
204 |
205 | var resultSb = new StringBuilder();
206 | bool isFirst = true;
207 |
208 | element.Attributes().Aggregate(
209 | resultSb,
210 | (sb, attribute) =>
211 | {
212 | string attributeValue = attribute.Value;
213 |
214 | if (string.IsNullOrEmpty(attributeValue))
215 | {
216 | return sb;
217 | }
218 |
219 | if (!isFirst)
220 | {
221 | resultSb.Append(separator);
222 | }
223 |
224 | isFirst = false;
225 |
226 | sb.Append(attribute.Value);
227 |
228 | return sb;
229 | });
230 |
231 | return resultSb.ToString();
232 | }
233 |
234 | public static string GetInnerHtml(this XContainer container)
235 | {
236 | if (container == null)
237 | {
238 | throw new ArgumentNullException("container");
239 | }
240 |
241 | var resultSb = new StringBuilder();
242 |
243 | foreach (var childNode in container.Nodes())
244 | {
245 | try
246 | {
247 | resultSb.Append(childNode.ToString(SaveOptions.DisableFormatting));
248 | }
249 | catch (ArgumentException)
250 | {
251 | if (childNode is XElement)
252 | {
253 | resultSb.Append(RemoveInvalidXMLChars((childNode as XElement).Value));
254 | }
255 | }
256 | }
257 |
258 | return resultSb.ToString();
259 | }
260 |
261 | public static void SetInnerHtml(this XElement element, string html)
262 | {
263 | if (element == null)
264 | {
265 | throw new ArgumentNullException("element");
266 | }
267 |
268 | if (html == null)
269 | {
270 | throw new ArgumentNullException("html");
271 | }
272 |
273 | element.RemoveAll();
274 |
275 | var tmpElement = new SgmlDomBuilder().BuildDocument(html);
276 |
277 | if (tmpElement.Root == null)
278 | {
279 | return;
280 | }
281 |
282 | foreach (var node in tmpElement.Root.Nodes())
283 | {
284 | element.Add(node);
285 | }
286 | }
287 |
288 | #endregion
289 |
290 | #region XContainer extensions
291 |
292 | public static IEnumerable GetElementsByTagName(this XContainer container, string tagName)
293 | {
294 | if (container == null)
295 | {
296 | throw new ArgumentNullException("container");
297 | }
298 |
299 | if (string.IsNullOrEmpty(tagName))
300 | {
301 | throw new ArgumentNullException("tagName");
302 | }
303 |
304 | return container.Descendants()
305 | .Where(e => tagName.Equals(e.Name.LocalName, StringComparison.OrdinalIgnoreCase));
306 | }
307 |
308 | public static IEnumerable GetElementsByClass(this XContainer container, string className)
309 | {
310 | if (container == null)
311 | {
312 | throw new ArgumentNullException("container");
313 | }
314 |
315 | if (string.IsNullOrEmpty(className))
316 | {
317 | throw new ArgumentNullException("className");
318 | }
319 |
320 | if (className.StartsWith("."))
321 | {
322 | className = className.Remove(0, 1);
323 | }
324 |
325 | return container.Descendants()
326 | .Where(e => e != null && e.GetAttributeValue("class", "").Contains(className)); //tagName.Equals(e.Name.LocalName, StringComparison.OrdinalIgnoreCase));
327 | }
328 |
329 | public static IEnumerable GetChildrenByTagName(this XContainer container, string tagName)
330 | {
331 | if (container == null)
332 | {
333 | throw new ArgumentNullException("container");
334 | }
335 |
336 | if (string.IsNullOrEmpty(tagName))
337 | {
338 | throw new ArgumentNullException("tagName");
339 | }
340 |
341 | return container.Elements()
342 | .Where(e => e.Name != null && tagName.Equals(e.Name.LocalName, StringComparison.OrdinalIgnoreCase));
343 | }
344 |
345 | #endregion
346 | }
347 | }
348 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/DomSerializationParams.cs:
--------------------------------------------------------------------------------
1 |
2 | namespace ReadSharp.Ports.NReadability
3 | {
4 | public class DomSerializationParams
5 | {
6 | #region Factory methods
7 |
8 | ///
9 | /// Creates an instance of DomSerializationParams with parameters set to their defaults.
10 | ///
11 | public static DomSerializationParams CreateDefault()
12 | {
13 | return new DomSerializationParams();
14 | }
15 |
16 | #endregion
17 |
18 | #region Properties
19 |
20 | ///
21 | /// Determines whether the output will be formatted.
22 | ///
23 | public bool PrettyPrint { get; set; }
24 |
25 | ///
26 | /// Determines whether DOCTYPE will be included at the beginning of the output.
27 | ///
28 | public bool DontIncludeContentTypeMetaElement { get; set; }
29 |
30 | ///
31 | /// Determines whether mobile-specific elements (such as eg. meta HandheldFriendly) will be added/replaced in the output.
32 | ///
33 | public bool DontIncludeMobileSpecificMetaElements { get; set; }
34 |
35 | ///
36 | /// Determines whether a meta tag with a content-type specification will be added/replaced in the output.
37 | ///
38 | public bool DontIncludeDocTypeMetaElement { get; set; }
39 |
40 | ///
41 | /// Determines whether a meta tag with a generator specification will be added/replaced in the output.
42 | ///
43 | public bool DontIncludeGeneratorMetaElement { get; set; }
44 |
45 | ///
46 | /// If [true], replace all img-tags with placeholders.
47 | ///
48 | public bool ReplaceImagesWithPlaceholders { get; set; }
49 |
50 | ///
51 | /// Render complete Website or only the Body
52 | ///
53 | public bool BodyOnly { get; set; }
54 |
55 | ///
56 | /// Remove headline of website
57 | ///
58 | public bool NoHeadline { get; set; }
59 | #endregion
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/ElementsTraverser.cs:
--------------------------------------------------------------------------------
1 | /*
2 | * NReadability
3 | * http://code.google.com/p/nreadability/
4 | *
5 | * Copyright 2010 Marek Stój
6 | * http://immortal.pl/
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | using System;
22 | using System.Xml.Linq;
23 |
24 | namespace ReadSharp.Ports.NReadability
25 | {
26 | public class ElementsTraverser
27 | {
28 | private readonly Action _elementVisitor;
29 |
30 | #region Constructor(s)
31 |
32 | public ElementsTraverser(Action elementVisitor)
33 | {
34 | if (elementVisitor == null)
35 | {
36 | throw new ArgumentNullException("elementVisitor");
37 | }
38 |
39 | _elementVisitor = elementVisitor;
40 | }
41 |
42 | #endregion
43 |
44 | #region Public methods
45 |
46 | public void Traverse(XElement element)
47 | {
48 | _elementVisitor(element);
49 |
50 | var childNode = element.FirstNode;
51 |
52 | while (childNode != null)
53 | {
54 | var nextChildNode = childNode.NextNode;
55 |
56 | if (childNode is XElement)
57 | {
58 | Traverse((XElement)childNode);
59 | }
60 |
61 | childNode = nextChildNode;
62 | }
63 | }
64 |
65 | #endregion
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/EncodedStringWriter.cs:
--------------------------------------------------------------------------------
1 | /*
2 | * NReadability
3 | * http://code.google.com/p/nreadability/
4 | *
5 | * Copyright 2010 Marek Stój
6 | * http://immortal.pl/
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | using System;
22 | using System.IO;
23 | using System.Text;
24 |
25 | namespace ReadSharp.Ports.NReadability
26 | {
27 | internal class EncodedStringWriter : StringWriter
28 | {
29 | private static readonly Encoding _DefaultEncoding = Encoding.UTF8;
30 |
31 | private readonly Encoding _encoding;
32 |
33 | #region Constructor(s)
34 |
35 | public EncodedStringWriter(StringBuilder sb, Encoding encoding)
36 | : base(sb)
37 | {
38 | if (encoding == null)
39 | {
40 | throw new ArgumentNullException("encoding");
41 | }
42 |
43 | _encoding = encoding;
44 | }
45 |
46 | public EncodedStringWriter(StringBuilder sb)
47 | : this(sb, _DefaultEncoding)
48 | {
49 | }
50 |
51 | #endregion
52 |
53 | #region Properties
54 |
55 | public override Encoding Encoding
56 | {
57 | get { return _encoding; }
58 | }
59 |
60 | #endregion
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/EnumerableExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 |
5 | namespace ReadSharp.Ports.NReadability
6 | {
7 | public static class EnumerableExtensions
8 | {
9 | ///
10 | /// Returns the only one element in the sequence or default(T) if either the sequence doesn't contain any elements or it contains more than one element.
11 | ///
12 | public static T SingleOrNone(this IEnumerable enumerable)
13 | where T : class
14 | {
15 | // ReSharper disable PossibleMultipleEnumeration
16 |
17 | if (enumerable == null)
18 | {
19 | throw new ArgumentNullException("enumerable");
20 | }
21 |
22 | T firstElement = enumerable.FirstOrDefault();
23 |
24 | if (firstElement == null)
25 | {
26 | // no elements
27 | return null;
28 | }
29 |
30 | T secondElement = enumerable.Skip(1).FirstOrDefault();
31 |
32 | if (secondElement != null)
33 | {
34 | // more than one element
35 | return null;
36 | }
37 |
38 | return firstElement;
39 |
40 | // ReSharper restore PossibleMultipleEnumeration
41 | }
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/Enums.cs:
--------------------------------------------------------------------------------
1 | /*
2 | * NReadability
3 | * http://code.google.com/p/nreadability/
4 | *
5 | * Copyright 2010 Marek Stój
6 | * http://immortal.pl/
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | namespace ReadSharp.Ports.NReadability
22 | {
23 | ///
24 | /// Determines how the extracted article will be styled.
25 | ///
26 | public enum ReadingStyle
27 | {
28 | ///
29 | /// Newspaper style.
30 | ///
31 | Newspaper,
32 |
33 | ///
34 | /// Novel style.
35 | ///
36 | Novel,
37 |
38 | ///
39 | /// Ebook style.
40 | ///
41 | Ebook,
42 |
43 | ///
44 | /// Terminal style.
45 | ///
46 | Terminal,
47 | }
48 |
49 | ///
50 | /// Determines how wide the margin of the extracted article will be.
51 | ///
52 | public enum ReadingMargin
53 | {
54 | ///
55 | /// Extra-narrow margin.
56 | ///
57 | XNarrow,
58 |
59 | ///
60 | /// Narrow margin.
61 | ///
62 | Narrow,
63 |
64 | ///
65 | /// Medium margin.
66 | ///
67 | Medium,
68 |
69 | ///
70 | /// Wide margin.
71 | ///
72 | Wide,
73 |
74 | ///
75 | /// Extra-wide margin.
76 | ///
77 | XWide,
78 | }
79 |
80 | ///
81 | /// Determines how large the font of the extracted article will be.
82 | ///
83 | public enum ReadingSize
84 | {
85 | ///
86 | /// Extra-small font.
87 | ///
88 | XSmall,
89 |
90 | ///
91 | /// Small font.
92 | ///
93 | Small,
94 |
95 | ///
96 | /// Medium font.
97 | ///
98 | Medium,
99 |
100 | ///
101 | /// Large font.
102 | ///
103 | Large,
104 |
105 | ///
106 | /// Extra-large font.
107 | ///
108 | XLarge,
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/HtmlUtils.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace ReadSharp.Ports.NReadability
4 | {
5 | public static class HtmlUtils
6 | {
7 | public static string RemoveScriptTags(string htmlContent)
8 | {
9 | if (htmlContent == null)
10 | {
11 | throw new ArgumentNullException("htmlContent");
12 | }
13 |
14 | if (htmlContent.Length == 0)
15 | {
16 | return "";
17 | }
18 |
19 | int indexOfScriptTagStart = htmlContent.IndexOf("".Length);
36 |
37 | return RemoveScriptTags(strippedHtmlContent);
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/InternalErrorException.cs:
--------------------------------------------------------------------------------
1 | /*
2 | * NReadability
3 | * http://code.google.com/p/nreadability/
4 | *
5 | * Copyright 2010 Marek Stój
6 | * http://immortal.pl/
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | using System;
22 | using System.Runtime.Serialization;
23 |
24 | namespace ReadSharp.Ports.NReadability
25 | {
26 | ///
27 | /// An exception that is thrown when an internal error occurrs in the application.
28 | /// Internal error in the application means that there is a bug in the application.
29 | ///
30 | public class InternalErrorException : Exception
31 | {
32 | #region Constructor(s)
33 |
34 | ///
35 | /// Initializes a new instance of the InternalErrorException class with a specified error message and a reference to the inner exception that is the cause of this exception.
36 | ///
37 | /// The error message that explains the reason for the exception.
38 | /// The exception that is the cause of the current exception, or a null reference (Nothing in Visual Basic) if no inner exception is specified.
39 | public InternalErrorException(string message, Exception innerException)
40 | : base(message, innerException)
41 | {
42 | }
43 |
44 | ///
45 | /// Initializes a new instance of the InternalErrorException class with a specified error message.
46 | ///
47 | /// The message that describes the error.
48 | public InternalErrorException(string message)
49 | : base(message)
50 | {
51 | }
52 |
53 | ///
54 | /// Initializes a new instance of the InternalErrorException class.
55 | ///
56 | public InternalErrorException()
57 | {
58 | }
59 |
60 | #endregion
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/MetaExtractor.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Xml.Linq;
5 |
6 | namespace ReadSharp.Ports.NReadability
7 | {
8 | public class MetaExtractor
9 | {
10 | ///
11 | /// Gets or sets a value indicating whether [has value].
12 | ///
13 | ///
14 | /// true if [has value]; otherwise, false.
15 | ///
16 | public bool HasValue { get; set; }
17 |
18 | ///
19 | /// Gets or sets the tags.
20 | ///
21 | ///
22 | /// The tags.
23 | ///
24 | public IEnumerable Tags { get; private set; }
25 |
26 |
27 |
28 | ///
29 | /// Initializes a new instance of the class.
30 | ///
31 | /// The document.
32 | public MetaExtractor(XDocument document)
33 | {
34 | var documentRoot = document.Root;
35 |
36 | if (documentRoot == null || documentRoot.Name == null || !"html".Equals(documentRoot.Name.LocalName, StringComparison.OrdinalIgnoreCase))
37 | {
38 | HasValue = false;
39 | return;
40 | }
41 |
42 | var headElement = documentRoot.GetChildrenByTagName("head").FirstOrDefault();
43 |
44 | if (headElement == null)
45 | {
46 | HasValue = false;
47 | return;
48 | }
49 |
50 | IEnumerable meta = headElement.GetChildrenByTagName("meta");
51 | IEnumerable link = headElement.GetChildrenByTagName("link");
52 |
53 | Tags = meta != null ? meta.Concat(link) : link;
54 | HasValue = Tags != null && Tags.Count() > 0;
55 | }
56 |
57 |
58 | ///
59 | /// Gets the meta description.
60 | ///
61 | ///
62 | public string GetMetaDescription()
63 | {
64 | return SearchCandidates(new Dictionary()
65 | {
66 | { "property|og:description", "content" },
67 | { "name|description", "content" }
68 | });
69 | }
70 |
71 |
72 | ///
73 | /// Gets the meta image.
74 | ///
75 | ///
76 | public string GetMetaImage()
77 | {
78 | return SearchCandidates(new Dictionary()
79 | {
80 | { "property|og:image", "content" },
81 | { "rel|apple-touch-icon", "href" },
82 | { "rel|apple-touch-icon-precomposed", "href"},
83 | { "name|msapplication-square310x310logo", "content" },
84 | { "name|msapplication-square150x150logo", "content" },
85 | { "name|msapplication-square70x70logo", "content" },
86 | { "name|msapplication-TileImage", "content" },
87 | { "rel|image_src", "href" }
88 | });
89 | }
90 |
91 |
92 | ///
93 | /// Gets the meta favicon.
94 | ///
95 | ///
96 | public string GetMetaFavicon()
97 | {
98 | return SearchCandidates(new Dictionary()
99 | {
100 | { "rel|icon", "href" },
101 | { "rel|shortcut icon", "href" }
102 | });
103 | }
104 |
105 |
106 | ///
107 | /// Gets the charset.
108 | ///
109 | ///
110 | public string GetCharset()
111 | {
112 | // find:
113 | string result = SearchCandidates(new Dictionary()
114 | {
115 | { "charset", "charset" }
116 | }, true);
117 |
118 | if (String.IsNullOrEmpty(result))
119 | {
120 | // find:
121 | result = SearchCandidates(new Dictionary()
122 | {
123 | { "http-equiv|Content-Type", "content" }
124 | });
125 |
126 | if (!String.IsNullOrEmpty(result))
127 | {
128 | int charsetStart = result.IndexOf("charset=");
129 | if (charsetStart > 0)
130 | {
131 | charsetStart += 8;
132 | result = result.Substring(charsetStart, result.Length - charsetStart);
133 | }
134 | }
135 | }
136 |
137 | return String.IsNullOrEmpty(result) ? null : result.ToUpper();
138 | }
139 |
140 |
141 | ///
142 | /// Searches the candidates.
143 | ///
144 | /// The candidates.
145 | ///
146 | private string SearchCandidates(Dictionary candidates, bool simple = false)
147 | {
148 | string result = null;
149 |
150 | foreach (var candidate in candidates)
151 | {
152 | XElement element;
153 | string[] type = candidate.Key.Split('|');
154 |
155 | if (simple)
156 | {
157 | element = Tags
158 | .Where(item => item.GetAttributeValue(type[0], null) != null)
159 | .FirstOrDefault();
160 | }
161 | else
162 | {
163 | element = Tags
164 | .Where(item => String.Equals(item.GetAttributeValue(type[0], null), type[1], StringComparison.OrdinalIgnoreCase))
165 | .FirstOrDefault();
166 | }
167 |
168 | if (element != null)
169 | {
170 | result = element.GetAttributeValue(candidate.Value, "");
171 | }
172 |
173 | if (result != null && result.Length > 1)
174 | {
175 | break;
176 | }
177 | }
178 |
179 | return result;
180 | }
181 | }
182 | }
183 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/NReadability.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 11.0
6 | Debug
7 | AnyCPU
8 | {14C3EE6A-54A4-4A37-8B56-D52A3802F1C2}
9 | Library
10 | Properties
11 | ReadSharp.Ports.NReadability
12 | ReadSharp.Ports.NReadability
13 | v4.5
14 | Profile259
15 | 512
16 | {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}
17 |
18 |
19 |
20 |
21 | 4.0
22 | publish\
23 | true
24 | Disk
25 | false
26 | Foreground
27 | 7
28 | Days
29 | false
30 | false
31 | true
32 | 0
33 | 1.0.0.%2a
34 | false
35 | false
36 | true
37 |
38 |
39 | true
40 | full
41 | false
42 | bin\Debug\
43 | DEBUG;TRACE
44 | prompt
45 | 4
46 |
47 |
48 | pdbonly
49 | true
50 | bin\Release\
51 | TRACE
52 | prompt
53 | 4
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 | {9112414c-e2d1-43ba-a298-a89f77d94332}
86 | SgmlReader
87 |
88 |
89 |
90 |
91 | False
92 | .NET Framework 3.5 SP1 Client Profile
93 | false
94 |
95 |
96 | False
97 | .NET Framework 3.5 SP1
98 | false
99 |
100 |
101 |
102 |
109 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Resources;
2 | using System.Reflection;
3 | using System.Runtime.CompilerServices;
4 | using System.Runtime.InteropServices;
5 |
6 | // General Information about an assembly is controlled through the following
7 | // set of attributes. Change these attribute values to modify the information
8 | // associated with an assembly.
9 | [assembly: AssemblyTitle("NReadabilityPCL")]
10 | [assembly: AssemblyDescription("")]
11 | [assembly: AssemblyConfiguration("")]
12 | [assembly: AssemblyCompany("")]
13 | [assembly: AssemblyProduct("NReadabilityPCL")]
14 | [assembly: AssemblyCopyright("Copyright © 2013")]
15 | [assembly: AssemblyTrademark("")]
16 | [assembly: AssemblyCulture("")]
17 | [assembly: NeutralResourcesLanguage("en")]
18 |
19 | // Version information for an assembly consists of the following four values:
20 | //
21 | // Major Version
22 | // Minor Version
23 | // Build Number
24 | // Revision
25 | //
26 | // You can specify all the values or you can default the Build and Revision Numbers
27 | // by using the '*' as shown below:
28 | // [assembly: AssemblyVersion("1.0.*")]
29 | [assembly: AssemblyVersion("1.0.0.0")]
30 | [assembly: AssemblyFileVersion("1.0.0.0")]
31 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/Resources/readability.css:
--------------------------------------------------------------------------------
1 | /* Document */
2 | body {font-size: 100%;}
3 | #readability-logo,#arc90-logo,.footer-twitterLink,#readTools a,a.rdbTK-powered span{background-color:transparent!important;background-image:url(http://lab.arc90.com/experiments/readability/images/sprite-readability.png)!important;background-repeat:no-repeat!important;}
4 | #readOverlay {display:block;position:absolute;top:0;left:0;width:100%;}
5 | #readInner {line-height:1.4em;max-width:800px;margin:1em auto;}
6 | #readInner a {color:#039;text-decoration:none;}
7 | #readInner a:hover {text-decoration:underline;}
8 | #readInner img {float:left;clear:both;margin: 0 12px 12px 0;}
9 | #readInner h1 {display:block;width:100%;border-bottom:1px solid #333;font-size:1.2em;padding-bottom:.5em;margin-top:0;margin-bottom:.75em;}
10 | #readInner sup{line-height:.8em;}
11 | #readInner .page-separator{clear:both;display:block;font-size:.85em;filter:alpha(opacity=20);opacity:.20;text-align:center;}
12 | .style-apertura #readInner h1 {border-bottom-color:#ededed;}
13 | #readInner blockquote {margin-left:3em;margin-right:3em;}
14 | #readability-inner * {margin-bottom:16px;border:none;background:none;}
15 | /* Footer */
16 | #readFooter {display:block;border-top:1px solid #333;text-align:center;clear:both;overflow:hidden;}
17 | .style-apertura #readFooter {border-top-color:#ededed;}
18 | #rdb-footer-left {display:inline;float:left;margin-top:15px;width:285px;background-position:0 -36px;}
19 | .rdbTypekit #rdb-footer-left {width:475px;}
20 | #rdb-footer-left a,#rdb-footer-left a:link {float:left;}
21 | #readability-logo {display:inline;background-position:0 -36px;height:29px;width:189px;text-indent:-9000px;}
22 | #arc90-logo {display:inline;background-position:right -36px;height:29px;width:96px;text-indent:-9000px;}
23 | #readability-url {display:none;}
24 | .style-apertura #readability-logo {background-position:0 -67px;}
25 | .style-apertura #arc90-logo {background-position:right -67px;}
26 | #rdb-footer-right {display:inline;float:right;text-align:right;font-size:.75em;margin-top:18px;}
27 | #rdb-footer-right a {display:inline-block;float:left;overflow:visible;line-height:16px;vertical-align:baseline;}
28 | .footer-twitterLink {height:20px;margin-left:20px;padding:4px 0 0 28px;background-position:0 -123px;font-size:12px;}
29 | #rdb-footer-left .footer-twitterLink {display:none;margin-top:1px;padding-top:2px;}
30 | .rdbTypekit #rdb-footer-right .footer-twitterLink {display:none;}
31 | .rdbTypekit #rdb-footer-left .footer-twitterLink {display:inline-block!important;}
32 | a.rdbTK-powered,a.rdbTK-powered:link,a.rdbTK-powered:hover {font-size:16px;color:#858789!important;text-decoration:none!important;}
33 | a.rdbTK-powered span {display:inline-block;height:22px;margin-left:2px;padding:4px 0 0 26px;background-position:0 -146px!important;}
34 | .style-apertura #rdb-inverse,.style-athelas #rdb-athelas {display:block;}
35 | span.version {display:none;}
36 | /* Tools */
37 | #readTools {width:34px;height:150px;position:fixed;z-index:100;top:10px;left:10px;}
38 | #readTools a {overflow:hidden;margin-bottom:8px;display:block;opacity:.4;text-indent:-99999px;height:34px;width:34px;text-decoration:none;filter:alpha(opacity=40);}
39 | #reload-page {background-position:0 0;}
40 | #print-page {background-position:-36px 0;}
41 | #email-page {background-position:-72px 0;}
42 | #kindle-page {background-position:-108px 0;}
43 | #readTools a:hover {opacity:1;filter:alpha(opacity=100);}
44 | /* -- USER-CONFIGURABLE STYLING -- */
45 | /* Size */
46 | .size-x-small {font-size:.75em;}
47 | .size-small {font-size:.938em;}
48 | .size-medium {font-size:1.125em;}
49 | .size-large {font-size:1.375em;}
50 | .size-x-large {font-size:1.75em;}
51 | /* Style */
52 | .style-newspaper {font-family:"Times New Roman", Times, serif;background:#fbfbfb;color:#080000;}
53 | .style-newspaper h1 {text-transform:capitalize;font-family:Georgia, "Times New Roman", Times, serif;}
54 | .style-newspaper #readInner a {color:#0924e1;}
55 | .style-novel {font-family:"Palatino Linotype", "Book Antiqua", Palatino, serif;background:#f4eed9;color:#1d1916;}
56 | .style-novel #readInner a {color:#1856ba;}
57 | .style-ebook {font-family:Arial, Helvetica, sans-serif;background:#edebe8;color:#2c2d32;}
58 | .style-ebook #readInner a {color:#187dc9;}
59 | .style-ebook h1 {font-family:"Arial Black", Gadget, sans-serif;font-weight:400;}
60 | .style-terminal {font-family:"Lucida Console", Monaco, monospace;background:#1d4e2c;color:#c6ffc6;}
61 | .style-terminal #readInner a {color:#093;}
62 | /* Typekit */
63 | .style-apertura {font-family:"apertura-1", "apertura-2", sans-serif;background-color:#2d2828;color:#eae8e9;}
64 | .style-apertura #readInner a {color:#58b0ff;}
65 | .style-athelas {font-family:"athelas-1", "athelas-2", "Palatino Linotype", "Book Antiqua", Palatino, serif;background-color:#f7f7f7;color:#2b373d;}
66 | .style-athelas #readInner a {color:#1e83cb;}
67 | /* Margin */
68 | .margin-x-narrow {width:95%;}
69 | .margin-narrow {width:85%;}
70 | .margin-medium {width:75%;}
71 | .margin-wide {width:55%;}
72 | .margin-x-wide {width:35%;}
73 | /* -- USER-CONFIGURABLE STYLING -- */
74 | /* -- DEBUG -- */
75 | .bug-green {background:#bbf9b0;border:4px solid green;}
76 | .bug-red {background:red;}
77 | .bug-yellow {background:#ffff8e;}
78 | .bug-blue {background:#bfdfff;}
79 | /* -- EMAIL / KINDLE POP UP -- */
80 | #kindle-container, #email-container {position:fixed;top:60px;left:50%;width:500px;height:490px;border:solid 3px #666;background-color:#fff;z-index:100!important;overflow:hidden;margin:0 0 0 -240px;padding:0;}
81 | /* Override html styling attributes */
82 | table, tr, td { background-color: transparent !important; }
83 |
--------------------------------------------------------------------------------
/PortablePorts/NReadability/SgmlDomBuilder.cs:
--------------------------------------------------------------------------------
1 | /*
2 | * NReadability
3 | * http://code.google.com/p/nreadability/
4 | *
5 | * Copyright 2010 Marek Stój
6 | * http://immortal.pl/
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | using System;
22 | using System.Text;
23 | using System.Text.RegularExpressions;
24 | using System.Xml;
25 | using System.Xml.Linq;
26 | using ReadSharp.Ports.Sgml;
27 | using System.IO;
28 |
29 | namespace ReadSharp.Ports.NReadability
30 | {
31 | ///
32 | /// A class for constructing a DOM from HTML markup.
33 | ///
34 | public class SgmlDomBuilder
35 | {
36 | #region Public methods
37 |
38 | ///
39 | /// Constructs a DOM (System.Xml.Linq.XDocument) from HTML markup.
40 | ///
41 | /// HTML markup from which the DOM is to be constructed.
42 | /// System.Linq.Xml.XDocument instance which is a DOM of the provided HTML markup.
43 | public XDocument BuildDocument(string htmlContent)
44 | {
45 | if (htmlContent == null)
46 | {
47 | throw new ArgumentNullException("htmlContent");
48 | }
49 |
50 | if (htmlContent.Trim().Length == 0)
51 | {
52 | return new XDocument();
53 | }
54 |
55 | // "trim end" htmlContent to ...