├── .editorconfig ├── .gitattributes ├── .gitignore ├── .nuget ├── NuGet.Config ├── NuGet.exe └── NuGet.targets ├── Assets ├── github-header.png └── readsharp.png ├── CHANGELOG.md ├── LICENSE-MIT ├── PortablePorts ├── NReadability │ ├── AttributeTransformationInput.cs │ ├── AttributeTransformationResult.cs │ ├── ChildNodesTraverser.cs │ ├── Consts.cs │ ├── DomExtensions.cs │ ├── DomSerializationParams.cs │ ├── ElementsTraverser.cs │ ├── EncodedStringWriter.cs │ ├── EnumerableExtensions.cs │ ├── Enums.cs │ ├── HtmlUtils.cs │ ├── InternalErrorException.cs │ ├── MetaExtractor.cs │ ├── NReadability.csproj │ ├── NReadabilityTranscoder.cs │ ├── Properties │ │ └── AssemblyInfo.cs │ ├── Resources │ │ └── readability.css │ ├── SgmlDomBuilder.cs │ ├── SgmlDomSerializer.cs │ ├── TranscodingInput.cs │ ├── TranscodingResult.cs │ ├── UtilityExtensions.cs │ ├── WebTranscodingInput.cs │ └── WebTranscodingResult.cs └── SgmlReader │ ├── HTMLspecial.ent │ ├── HTMLsymbol.ent │ ├── Html.dtd │ ├── Properties │ └── AssemblyInfo.cs │ ├── SgmlParser.cs │ ├── SgmlReader.cs │ ├── SgmlReader.csproj │ └── htmllat1.ent ├── README.md ├── ReadSharp.Tests ├── Properties │ └── AssemblyInfo.cs ├── ReadSharp.Tests.csproj ├── ReadTests.cs ├── TestsBase.cs ├── app.config └── packages.config ├── ReadSharp.sln └── ReadSharp ├── Encodings ├── Encoder.cs ├── Iso88591.cs ├── Iso885913.cs ├── Iso885915.cs ├── Iso88592.cs ├── Iso88593.cs ├── Iso88594.cs ├── Iso88595.cs ├── Iso88596.cs ├── Iso88597.cs ├── Iso88598.cs ├── Iso88599.cs ├── Windows1250.cs ├── Windows1251.cs ├── Windows1252.cs ├── Windows1253.cs ├── Windows1254.cs ├── Windows1255.cs ├── Windows1256.cs ├── Windows1257.cs └── Windows1258.cs ├── HtmlUtilities.cs ├── IReader.cs ├── Models ├── Article.cs ├── ArticleImage.cs ├── HttpOptions.cs ├── ReadOptions.cs └── Response.cs ├── Properties └── AssemblyInfo.cs ├── ReadException.cs ├── ReadSharp.csproj ├── ReadSharp.nuspec ├── Reader.cs └── packages.config /.editorconfig: -------------------------------------------------------------------------------- 1 | ; This file is for unifying the coding style for different editors and IDEs 2 | ; editorconfig.org 3 | 4 | root = true 5 | 6 | [*] 7 | end_of_line = crlf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | indent_style = space 12 | indent_size = 2 13 | 14 | [*.md] 15 | trim_trailing_whitespace = false 16 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text 3 | 4 | # Don't check these into the repo as LF to work around TeamCity bug 5 | *.xml -text 6 | *.targets -text 7 | 8 | # Custom for Visual Studio 9 | *.cs diff=csharp 10 | *.sln merge=union 11 | *.csproj merge=union 12 | *.vbproj merge=union 13 | *.fsproj merge=union 14 | *.dbproj merge=union 15 | 16 | # Denote all files that are truly binary and should not be modified. 17 | *.dll binary 18 | *.exe binary 19 | *.png binary 20 | *.ico binary 21 | *.snk binary 22 | *.pdb binary 23 | *.svg binary 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and files generated by popular Visual Studio add-ons. 2 | 3 | # User-specific files 4 | *.suo 5 | *.user 6 | *.sln.docstates 7 | 8 | # Build results 9 | 10 | [Dd]ebug/ 11 | [Rr]elease/ 12 | x64/ 13 | build/ 14 | [Bb]in/ 15 | [Oo]bj/ 16 | 17 | # configuration 18 | Web.Release.config 19 | Web.Debug.config 20 | 21 | # MSTest test Results 22 | [Tt]est[Rr]esult*/ 23 | [Bb]uild[Ll]og.* 24 | 25 | *_i.c 26 | *_p.c 27 | *.ilk 28 | *.meta 29 | *.obj 30 | *.pch 31 | *.pdb 32 | *.pgc 33 | *.pgd 34 | *.rsp 35 | *.sbr 36 | *.tlb 37 | *.tli 38 | *.tlh 39 | *.tmp 40 | *.tmp_proj 41 | *.log 42 | *.vspscc 43 | *.vssscc 44 | .builds 45 | *.pidb 46 | *.log 47 | *.scc 48 | 49 | # Visual C++ cache files 50 | ipch/ 51 | *.aps 52 | *.ncb 53 | *.opensdf 54 | *.sdf 55 | *.cachefile 56 | 57 | # Visual Studio profiler 58 | *.psess 59 | *.vsp 60 | *.vspx 61 | 62 | # Guidance Automation Toolkit 63 | *.gpState 64 | 65 | # ReSharper is a .NET coding add-in 66 | _ReSharper*/ 67 | *.[Rr]e[Ss]harper 68 | 69 | # TeamCity is a build add-in 70 | _TeamCity* 71 | 72 | # DotCover is a Code Coverage Tool 73 | *.dotCover 74 | 75 | # NCrunch 76 | *.ncrunch* 77 | .*crunch*.local.xml 78 | 79 | # Installshield output folder 80 | [Ee]xpress/ 81 | 82 | # DocProject is a documentation generator add-in 83 | DocProject/buildhelp/ 84 | DocProject/Help/*.HxT 85 | DocProject/Help/*.HxC 86 | DocProject/Help/*.hhc 87 | DocProject/Help/*.hhk 88 | DocProject/Help/*.hhp 89 | DocProject/Help/Html2 90 | DocProject/Help/html 91 | 92 | # Click-Once directory 93 | publish/ 94 | 95 | # Publish Web Output 96 | *.Publish.xml 97 | *.pubxml 98 | 99 | # NuGet Packages Directory 100 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line 101 | packages/ 102 | *.nupkg 103 | 104 | # Windows Azure Build Output 105 | csx 106 | *.build.csdef 107 | 108 | # Windows Store app package directory 109 | AppPackages/ 110 | 111 | # Others 112 | sql/ 113 | *.Cache 114 | ClientBin/ 115 | [Ss]tyle[Cc]op.* 116 | ~$* 117 | *~ 118 | *.dbmdl 119 | *.[Pp]ublish.xml 120 | *.pfx 121 | *.publishsettings 122 | .sass-cache 123 | node_modules 124 | *.[L|l]og 125 | tmp 126 | _old 127 | _tmp 128 | Gemfile.lock 129 | WebWorkbench.mswwsettings 130 | 131 | # RIA/Silverlight projects 132 | Generated_Code/ 133 | 134 | # Backup & report files from converting an old project file to a newer 135 | # Visual Studio version. Backup files are not needed, because we have git ;-) 136 | _UpgradeReport_Files/ 137 | Backup*/ 138 | UpgradeLog*.XML 139 | UpgradeLog*.htm 140 | 141 | # SQL Server files 142 | App_Data/*.mdf 143 | App_Data/*.ldf 144 | 145 | # ========================= 146 | # Windows detritus 147 | # ========================= 148 | 149 | # Windows image file caches 150 | Thumbs.db 151 | ehthumbs.db 152 | 153 | # Folder config file 154 | Desktop.ini 155 | 156 | # Recycle Bin used on file shares 157 | $RECYCLE.BIN/ 158 | 159 | # Mac crap 160 | .DS_Store 161 | 162 | 163 | # ========================= 164 | # Project 165 | # ========================= 166 | -------------------------------------------------------------------------------- /.nuget/NuGet.Config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.nuget/NuGet.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceee/ReadSharp/9d49aa9a8c2f6f4db60348ba5fd5692f51cd439b/.nuget/NuGet.exe -------------------------------------------------------------------------------- /.nuget/NuGet.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | $(MSBuildProjectDirectory)\..\ 5 | 6 | 7 | false 8 | 9 | 10 | false 11 | 12 | 13 | true 14 | 15 | 16 | false 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | $([System.IO.Path]::Combine($(SolutionDir), ".nuget")) 31 | $([System.IO.Path]::Combine($(ProjectDir), "packages.config")) 32 | 33 | 34 | 35 | 36 | $(SolutionDir).nuget 37 | packages.config 38 | 39 | 40 | 41 | 42 | $(NuGetToolsPath)\NuGet.exe 43 | @(PackageSource) 44 | 45 | "$(NuGetExePath)" 46 | mono --runtime=v4.0.30319 $(NuGetExePath) 47 | 48 | $(TargetDir.Trim('\\')) 49 | 50 | -RequireConsent 51 | -NonInteractive 52 | 53 | "$(SolutionDir) " 54 | "$(SolutionDir)" 55 | 56 | 57 | $(NuGetCommand) install "$(PackagesConfig)" -source "$(PackageSources)" $(NonInteractiveSwitch) $(RequireConsentSwitch) -solutionDir $(PaddedSolutionDir) 58 | $(NuGetCommand) pack "$(ProjectPath)" -Properties "Configuration=$(Configuration);Platform=$(Platform)" $(NonInteractiveSwitch) -OutputDirectory "$(PackageOutputDir)" -symbols 59 | 60 | 61 | 62 | RestorePackages; 63 | $(BuildDependsOn); 64 | 65 | 66 | 67 | 68 | $(BuildDependsOn); 69 | BuildPackage; 70 | 71 | 72 | 73 | 74 | 75 | 76 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 91 | 92 | 95 | 96 | 97 | 98 | 100 | 101 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /Assets/github-header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceee/ReadSharp/9d49aa9a8c2f6f4db60348ba5fd5692f51cd439b/Assets/github-header.png -------------------------------------------------------------------------------- /Assets/readsharp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceee/ReadSharp/9d49aa9a8c2f6f4db60348ba5fd5692f51cd439b/Assets/readsharp.png -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ### 6.2.2 2 | 3 | - Option (`PreferHTMLEncoding`) to either prefer HTML or HTTP encoding for generating content 4 | 5 | ### 6.2.0 6 | 7 | - Option to replace images with placeholders 8 | 9 | ### 6.1.0 10 | 11 | - Add RAW HTML content to Article 12 | 13 | ### 6.0.0 14 | 15 | - Support for Universal apps (dropped SL and WP7 support) 16 | 17 | ### 5.0.0 18 | 19 | - HttpOptions for better control over the request 20 | - More reliable scraping of images 21 | - Remove unnecessary attributes from tags 22 | - Allow parsing of multi-page articles 23 | 24 | ### 4.2.3 25 | 26 | - add PrettyPrint option 27 | 28 | ### 4.2.2 29 | 30 | - use encoding found in HTTP headers in first iteration (fixes [issue #6](https://github.com/ceee/ReadSharp/issues/6)) 31 | 32 | ### 4.2.1 33 | 34 | - fixes [issue #3](https://github.com/ceee/ReadSharp/issues/3) 35 | 36 | ### 4.2.0 37 | 38 | - use custom encoders if not supported on platform (implemented for ISO-8859 and Windows range). 39 | 40 | ### 4.1.0 41 | 42 | - extract description, favicon and front image from meta tags 43 | - correct encoding - retry reading stream with charset from HTML headers, if not available in HTTP headers or not matching (fixes #1) 44 | 45 | ### 4.0.0 46 | 47 | - migrate PocketSharp.Reader to ReadSharp 48 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 cee, Tobias Klika 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/AttributeTransformationInput.cs: -------------------------------------------------------------------------------- 1 | using System.Xml.Linq; 2 | 3 | namespace ReadSharp.Ports.NReadability 4 | { 5 | public class AttributeTransformationInput 6 | { 7 | public string AttributeValue { get; set; } 8 | 9 | public string ArticleUrl { get; set; } 10 | 11 | public XElement Element { get; set; } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/AttributeTransformationResult.cs: -------------------------------------------------------------------------------- 1 | namespace ReadSharp.Ports.NReadability 2 | { 3 | public class AttributeTransformationResult 4 | { 5 | /// 6 | /// Result of the transformation. 7 | /// 8 | public string TransformedValue { get; set; } 9 | 10 | /// 11 | /// Name of the attribute that will be used to store the original value. Can be null. 12 | /// 13 | public string OriginalValueAttributeName { get; set; } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/ChildNodesTraverser.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * NReadability 3 | * http://code.google.com/p/nreadability/ 4 | * 5 | * Copyright 2010 Marek Stój 6 | * http://immortal.pl/ 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | using System; 22 | using System.Xml.Linq; 23 | 24 | namespace ReadSharp.Ports.NReadability 25 | { 26 | internal class ChildNodesTraverser 27 | { 28 | private readonly Action _childNodeVisitor; 29 | 30 | #region Constructor(s) 31 | 32 | public ChildNodesTraverser(Action childNodeVisitor) 33 | { 34 | if (childNodeVisitor == null) 35 | { 36 | throw new ArgumentNullException("childNodeVisitor"); 37 | } 38 | 39 | _childNodeVisitor = childNodeVisitor; 40 | } 41 | 42 | #endregion 43 | 44 | #region Public methods 45 | 46 | public void Traverse(XNode node) 47 | { 48 | if (!(node is XContainer)) 49 | { 50 | throw new ArgumentException("The node must be an XContainer in order to traverse its children."); 51 | } 52 | 53 | var childNode = ((XContainer)node).FirstNode; 54 | 55 | while (childNode != null) 56 | { 57 | var nextChildNode = childNode.NextNode; 58 | 59 | _childNodeVisitor(childNode); 60 | 61 | childNode = nextChildNode; 62 | } 63 | } 64 | 65 | #endregion 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/Consts.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | 3 | namespace ReadSharp.Ports.NReadability 4 | { 5 | public static class Consts 6 | { 7 | private static readonly string _nReadabilityFullName; 8 | 9 | #region Constructor(s) 10 | 11 | static Consts() 12 | { 13 | _nReadabilityFullName = string.Format("NReadability {0}", typeof(Consts).GetTypeInfo().Assembly.FullName); 14 | } 15 | 16 | #endregion 17 | 18 | #region Properties 19 | 20 | public static string NReadabilityFullName 21 | { 22 | get { return _nReadabilityFullName; } 23 | } 24 | 25 | #endregion 26 | 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/DomExtensions.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * NReadability 3 | * http://code.google.com/p/nreadability/ 4 | * 5 | * Copyright 2010 Marek Stój 6 | * http://immortal.pl/ 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | using System; 22 | using System.Collections.Generic; 23 | using System.Linq; 24 | using System.Text; 25 | using System.Text.RegularExpressions; 26 | using System.Xml.Linq; 27 | 28 | namespace ReadSharp.Ports.NReadability 29 | { 30 | public static class DomExtensions 31 | { 32 | // filters control characters but allows only properly-formed surrogate sequences 33 | private static Regex _invalidXMLChars = new Regex(@"(? 36 | /// removes any unusual unicode characters that can't be encoded into XML 37 | /// 38 | public static string RemoveInvalidXMLChars(string text) 39 | { 40 | if (String.IsNullOrEmpty(text)) return ""; 41 | return _invalidXMLChars.Replace(text, ""); 42 | } 43 | 44 | #region XDocument extensions 45 | 46 | public static XElement GetBody(this XDocument document) 47 | { 48 | if (document == null) 49 | { 50 | throw new ArgumentNullException("document"); 51 | } 52 | 53 | var documentRoot = document.Root; 54 | 55 | if (documentRoot == null) 56 | { 57 | return null; 58 | } 59 | 60 | return documentRoot.GetElementsByTagName("body").FirstOrDefault(); 61 | } 62 | 63 | public static string GetTitle(this XDocument document) 64 | { 65 | if (document == null) 66 | { 67 | throw new ArgumentNullException("document"); 68 | } 69 | 70 | var documentRoot = document.Root; 71 | 72 | if (documentRoot == null) 73 | { 74 | return null; 75 | } 76 | 77 | var headElement = documentRoot.GetElementsByTagName("head").FirstOrDefault(); 78 | 79 | if (headElement == null) 80 | { 81 | return ""; 82 | } 83 | 84 | var titleElement = headElement.GetChildrenByTagName("title").FirstOrDefault(); 85 | 86 | if (titleElement == null) 87 | { 88 | return ""; 89 | } 90 | 91 | return (titleElement.Value ?? "").Trim(); 92 | } 93 | 94 | public static XElement GetElementById(this XDocument document, string id) 95 | { 96 | if (document == null) 97 | { 98 | throw new ArgumentNullException("document"); 99 | } 100 | 101 | if (string.IsNullOrEmpty(id)) 102 | { 103 | throw new ArgumentNullException("id"); 104 | } 105 | 106 | return 107 | (from element in document.Descendants() 108 | let idAttribute = element.Attribute("id") 109 | where idAttribute != null && idAttribute.Value == id 110 | select element).SingleOrDefault(); 111 | } 112 | 113 | #endregion 114 | 115 | #region XElement extensions 116 | 117 | public static string GetId(this XElement element) 118 | { 119 | return element.GetAttributeValue("id", ""); 120 | } 121 | 122 | public static void SetId(this XElement element, string id) 123 | { 124 | element.SetAttributeValue("id", id); 125 | } 126 | 127 | public static string GetClass(this XElement element) 128 | { 129 | return element.GetAttributeValue("class", ""); 130 | } 131 | 132 | public static void SetClass(this XElement element, string @class) 133 | { 134 | element.SetAttributeValue("class", @class); 135 | } 136 | 137 | public static string GetStyle(this XElement element) 138 | { 139 | return element.GetAttributeValue("style", ""); 140 | } 141 | 142 | public static void SetStyle(this XElement element, string style) 143 | { 144 | element.SetAttributeValue("style", style); 145 | } 146 | 147 | public static string GetAttributeValue(this XElement element, string attributeName, string defaultValue) 148 | { 149 | if (element == null) 150 | { 151 | throw new ArgumentNullException("element"); 152 | } 153 | 154 | if (string.IsNullOrEmpty(attributeName)) 155 | { 156 | throw new ArgumentNullException("attributeName"); 157 | } 158 | 159 | var attribute = element.Attribute(attributeName); 160 | 161 | return attribute != null 162 | ? (attribute.Value ?? defaultValue) 163 | : defaultValue; 164 | } 165 | 166 | public static void SetAttributeValue(this XElement element, string attributeName, string value) 167 | { 168 | if (element == null) 169 | { 170 | throw new ArgumentNullException("element"); 171 | } 172 | 173 | if (string.IsNullOrEmpty(attributeName)) 174 | { 175 | throw new ArgumentNullException("attributeName"); 176 | } 177 | 178 | if (value == null) 179 | { 180 | var attribute = element.Attribute(attributeName); 181 | 182 | if (attribute != null) 183 | { 184 | attribute.Remove(); 185 | } 186 | } 187 | else 188 | { 189 | element.SetAttributeValue(attributeName, value); 190 | } 191 | } 192 | 193 | public static string GetAttributesString(this XElement element, string separator) 194 | { 195 | if (element == null) 196 | { 197 | throw new ArgumentNullException("element"); 198 | } 199 | 200 | if (separator == null) 201 | { 202 | throw new ArgumentNullException("separator"); 203 | } 204 | 205 | var resultSb = new StringBuilder(); 206 | bool isFirst = true; 207 | 208 | element.Attributes().Aggregate( 209 | resultSb, 210 | (sb, attribute) => 211 | { 212 | string attributeValue = attribute.Value; 213 | 214 | if (string.IsNullOrEmpty(attributeValue)) 215 | { 216 | return sb; 217 | } 218 | 219 | if (!isFirst) 220 | { 221 | resultSb.Append(separator); 222 | } 223 | 224 | isFirst = false; 225 | 226 | sb.Append(attribute.Value); 227 | 228 | return sb; 229 | }); 230 | 231 | return resultSb.ToString(); 232 | } 233 | 234 | public static string GetInnerHtml(this XContainer container) 235 | { 236 | if (container == null) 237 | { 238 | throw new ArgumentNullException("container"); 239 | } 240 | 241 | var resultSb = new StringBuilder(); 242 | 243 | foreach (var childNode in container.Nodes()) 244 | { 245 | try 246 | { 247 | resultSb.Append(childNode.ToString(SaveOptions.DisableFormatting)); 248 | } 249 | catch (ArgumentException) 250 | { 251 | if (childNode is XElement) 252 | { 253 | resultSb.Append(RemoveInvalidXMLChars((childNode as XElement).Value)); 254 | } 255 | } 256 | } 257 | 258 | return resultSb.ToString(); 259 | } 260 | 261 | public static void SetInnerHtml(this XElement element, string html) 262 | { 263 | if (element == null) 264 | { 265 | throw new ArgumentNullException("element"); 266 | } 267 | 268 | if (html == null) 269 | { 270 | throw new ArgumentNullException("html"); 271 | } 272 | 273 | element.RemoveAll(); 274 | 275 | var tmpElement = new SgmlDomBuilder().BuildDocument(html); 276 | 277 | if (tmpElement.Root == null) 278 | { 279 | return; 280 | } 281 | 282 | foreach (var node in tmpElement.Root.Nodes()) 283 | { 284 | element.Add(node); 285 | } 286 | } 287 | 288 | #endregion 289 | 290 | #region XContainer extensions 291 | 292 | public static IEnumerable GetElementsByTagName(this XContainer container, string tagName) 293 | { 294 | if (container == null) 295 | { 296 | throw new ArgumentNullException("container"); 297 | } 298 | 299 | if (string.IsNullOrEmpty(tagName)) 300 | { 301 | throw new ArgumentNullException("tagName"); 302 | } 303 | 304 | return container.Descendants() 305 | .Where(e => tagName.Equals(e.Name.LocalName, StringComparison.OrdinalIgnoreCase)); 306 | } 307 | 308 | public static IEnumerable GetElementsByClass(this XContainer container, string className) 309 | { 310 | if (container == null) 311 | { 312 | throw new ArgumentNullException("container"); 313 | } 314 | 315 | if (string.IsNullOrEmpty(className)) 316 | { 317 | throw new ArgumentNullException("className"); 318 | } 319 | 320 | if (className.StartsWith(".")) 321 | { 322 | className = className.Remove(0, 1); 323 | } 324 | 325 | return container.Descendants() 326 | .Where(e => e != null && e.GetAttributeValue("class", "").Contains(className)); //tagName.Equals(e.Name.LocalName, StringComparison.OrdinalIgnoreCase)); 327 | } 328 | 329 | public static IEnumerable GetChildrenByTagName(this XContainer container, string tagName) 330 | { 331 | if (container == null) 332 | { 333 | throw new ArgumentNullException("container"); 334 | } 335 | 336 | if (string.IsNullOrEmpty(tagName)) 337 | { 338 | throw new ArgumentNullException("tagName"); 339 | } 340 | 341 | return container.Elements() 342 | .Where(e => e.Name != null && tagName.Equals(e.Name.LocalName, StringComparison.OrdinalIgnoreCase)); 343 | } 344 | 345 | #endregion 346 | } 347 | } 348 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/DomSerializationParams.cs: -------------------------------------------------------------------------------- 1 |  2 | namespace ReadSharp.Ports.NReadability 3 | { 4 | public class DomSerializationParams 5 | { 6 | #region Factory methods 7 | 8 | /// 9 | /// Creates an instance of DomSerializationParams with parameters set to their defaults. 10 | /// 11 | public static DomSerializationParams CreateDefault() 12 | { 13 | return new DomSerializationParams(); 14 | } 15 | 16 | #endregion 17 | 18 | #region Properties 19 | 20 | /// 21 | /// Determines whether the output will be formatted. 22 | /// 23 | public bool PrettyPrint { get; set; } 24 | 25 | /// 26 | /// Determines whether DOCTYPE will be included at the beginning of the output. 27 | /// 28 | public bool DontIncludeContentTypeMetaElement { get; set; } 29 | 30 | /// 31 | /// Determines whether mobile-specific elements (such as eg. meta HandheldFriendly) will be added/replaced in the output. 32 | /// 33 | public bool DontIncludeMobileSpecificMetaElements { get; set; } 34 | 35 | /// 36 | /// Determines whether a meta tag with a content-type specification will be added/replaced in the output. 37 | /// 38 | public bool DontIncludeDocTypeMetaElement { get; set; } 39 | 40 | /// 41 | /// Determines whether a meta tag with a generator specification will be added/replaced in the output. 42 | /// 43 | public bool DontIncludeGeneratorMetaElement { get; set; } 44 | 45 | /// 46 | /// If [true], replace all img-tags with placeholders. 47 | /// 48 | public bool ReplaceImagesWithPlaceholders { get; set; } 49 | 50 | /// 51 | /// Render complete Website or only the Body 52 | /// 53 | public bool BodyOnly { get; set; } 54 | 55 | /// 56 | /// Remove headline of website 57 | /// 58 | public bool NoHeadline { get; set; } 59 | #endregion 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/ElementsTraverser.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * NReadability 3 | * http://code.google.com/p/nreadability/ 4 | * 5 | * Copyright 2010 Marek Stój 6 | * http://immortal.pl/ 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | using System; 22 | using System.Xml.Linq; 23 | 24 | namespace ReadSharp.Ports.NReadability 25 | { 26 | public class ElementsTraverser 27 | { 28 | private readonly Action _elementVisitor; 29 | 30 | #region Constructor(s) 31 | 32 | public ElementsTraverser(Action elementVisitor) 33 | { 34 | if (elementVisitor == null) 35 | { 36 | throw new ArgumentNullException("elementVisitor"); 37 | } 38 | 39 | _elementVisitor = elementVisitor; 40 | } 41 | 42 | #endregion 43 | 44 | #region Public methods 45 | 46 | public void Traverse(XElement element) 47 | { 48 | _elementVisitor(element); 49 | 50 | var childNode = element.FirstNode; 51 | 52 | while (childNode != null) 53 | { 54 | var nextChildNode = childNode.NextNode; 55 | 56 | if (childNode is XElement) 57 | { 58 | Traverse((XElement)childNode); 59 | } 60 | 61 | childNode = nextChildNode; 62 | } 63 | } 64 | 65 | #endregion 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/EncodedStringWriter.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * NReadability 3 | * http://code.google.com/p/nreadability/ 4 | * 5 | * Copyright 2010 Marek Stój 6 | * http://immortal.pl/ 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | using System; 22 | using System.IO; 23 | using System.Text; 24 | 25 | namespace ReadSharp.Ports.NReadability 26 | { 27 | internal class EncodedStringWriter : StringWriter 28 | { 29 | private static readonly Encoding _DefaultEncoding = Encoding.UTF8; 30 | 31 | private readonly Encoding _encoding; 32 | 33 | #region Constructor(s) 34 | 35 | public EncodedStringWriter(StringBuilder sb, Encoding encoding) 36 | : base(sb) 37 | { 38 | if (encoding == null) 39 | { 40 | throw new ArgumentNullException("encoding"); 41 | } 42 | 43 | _encoding = encoding; 44 | } 45 | 46 | public EncodedStringWriter(StringBuilder sb) 47 | : this(sb, _DefaultEncoding) 48 | { 49 | } 50 | 51 | #endregion 52 | 53 | #region Properties 54 | 55 | public override Encoding Encoding 56 | { 57 | get { return _encoding; } 58 | } 59 | 60 | #endregion 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/EnumerableExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | 5 | namespace ReadSharp.Ports.NReadability 6 | { 7 | public static class EnumerableExtensions 8 | { 9 | /// 10 | /// Returns the only one element in the sequence or default(T) if either the sequence doesn't contain any elements or it contains more than one element. 11 | /// 12 | public static T SingleOrNone(this IEnumerable enumerable) 13 | where T : class 14 | { 15 | // ReSharper disable PossibleMultipleEnumeration 16 | 17 | if (enumerable == null) 18 | { 19 | throw new ArgumentNullException("enumerable"); 20 | } 21 | 22 | T firstElement = enumerable.FirstOrDefault(); 23 | 24 | if (firstElement == null) 25 | { 26 | // no elements 27 | return null; 28 | } 29 | 30 | T secondElement = enumerable.Skip(1).FirstOrDefault(); 31 | 32 | if (secondElement != null) 33 | { 34 | // more than one element 35 | return null; 36 | } 37 | 38 | return firstElement; 39 | 40 | // ReSharper restore PossibleMultipleEnumeration 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/Enums.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * NReadability 3 | * http://code.google.com/p/nreadability/ 4 | * 5 | * Copyright 2010 Marek Stój 6 | * http://immortal.pl/ 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | namespace ReadSharp.Ports.NReadability 22 | { 23 | /// 24 | /// Determines how the extracted article will be styled. 25 | /// 26 | public enum ReadingStyle 27 | { 28 | /// 29 | /// Newspaper style. 30 | /// 31 | Newspaper, 32 | 33 | /// 34 | /// Novel style. 35 | /// 36 | Novel, 37 | 38 | /// 39 | /// Ebook style. 40 | /// 41 | Ebook, 42 | 43 | /// 44 | /// Terminal style. 45 | /// 46 | Terminal, 47 | } 48 | 49 | /// 50 | /// Determines how wide the margin of the extracted article will be. 51 | /// 52 | public enum ReadingMargin 53 | { 54 | /// 55 | /// Extra-narrow margin. 56 | /// 57 | XNarrow, 58 | 59 | /// 60 | /// Narrow margin. 61 | /// 62 | Narrow, 63 | 64 | /// 65 | /// Medium margin. 66 | /// 67 | Medium, 68 | 69 | /// 70 | /// Wide margin. 71 | /// 72 | Wide, 73 | 74 | /// 75 | /// Extra-wide margin. 76 | /// 77 | XWide, 78 | } 79 | 80 | /// 81 | /// Determines how large the font of the extracted article will be. 82 | /// 83 | public enum ReadingSize 84 | { 85 | /// 86 | /// Extra-small font. 87 | /// 88 | XSmall, 89 | 90 | /// 91 | /// Small font. 92 | /// 93 | Small, 94 | 95 | /// 96 | /// Medium font. 97 | /// 98 | Medium, 99 | 100 | /// 101 | /// Large font. 102 | /// 103 | Large, 104 | 105 | /// 106 | /// Extra-large font. 107 | /// 108 | XLarge, 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/HtmlUtils.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace ReadSharp.Ports.NReadability 4 | { 5 | public static class HtmlUtils 6 | { 7 | public static string RemoveScriptTags(string htmlContent) 8 | { 9 | if (htmlContent == null) 10 | { 11 | throw new ArgumentNullException("htmlContent"); 12 | } 13 | 14 | if (htmlContent.Length == 0) 15 | { 16 | return ""; 17 | } 18 | 19 | int indexOfScriptTagStart = htmlContent.IndexOf("", indexOfScriptTagStart, StringComparison.OrdinalIgnoreCase); 27 | 28 | if (indexOfScriptTagEnd == -1) 29 | { 30 | return htmlContent.Substring(0, indexOfScriptTagStart); 31 | } 32 | 33 | string strippedHtmlContent = 34 | htmlContent.Substring(0, indexOfScriptTagStart) + 35 | htmlContent.Substring(indexOfScriptTagEnd + "".Length); 36 | 37 | return RemoveScriptTags(strippedHtmlContent); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/InternalErrorException.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * NReadability 3 | * http://code.google.com/p/nreadability/ 4 | * 5 | * Copyright 2010 Marek Stój 6 | * http://immortal.pl/ 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | using System; 22 | using System.Runtime.Serialization; 23 | 24 | namespace ReadSharp.Ports.NReadability 25 | { 26 | /// 27 | /// An exception that is thrown when an internal error occurrs in the application. 28 | /// Internal error in the application means that there is a bug in the application. 29 | /// 30 | public class InternalErrorException : Exception 31 | { 32 | #region Constructor(s) 33 | 34 | /// 35 | /// Initializes a new instance of the InternalErrorException class with a specified error message and a reference to the inner exception that is the cause of this exception. 36 | /// 37 | /// The error message that explains the reason for the exception. 38 | /// The exception that is the cause of the current exception, or a null reference (Nothing in Visual Basic) if no inner exception is specified. 39 | public InternalErrorException(string message, Exception innerException) 40 | : base(message, innerException) 41 | { 42 | } 43 | 44 | /// 45 | /// Initializes a new instance of the InternalErrorException class with a specified error message. 46 | /// 47 | /// The message that describes the error. 48 | public InternalErrorException(string message) 49 | : base(message) 50 | { 51 | } 52 | 53 | /// 54 | /// Initializes a new instance of the InternalErrorException class. 55 | /// 56 | public InternalErrorException() 57 | { 58 | } 59 | 60 | #endregion 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/MetaExtractor.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Xml.Linq; 5 | 6 | namespace ReadSharp.Ports.NReadability 7 | { 8 | public class MetaExtractor 9 | { 10 | /// 11 | /// Gets or sets a value indicating whether [has value]. 12 | /// 13 | /// 14 | /// true if [has value]; otherwise, false. 15 | /// 16 | public bool HasValue { get; set; } 17 | 18 | /// 19 | /// Gets or sets the tags. 20 | /// 21 | /// 22 | /// The tags. 23 | /// 24 | public IEnumerable Tags { get; private set; } 25 | 26 | 27 | 28 | /// 29 | /// Initializes a new instance of the class. 30 | /// 31 | /// The document. 32 | public MetaExtractor(XDocument document) 33 | { 34 | var documentRoot = document.Root; 35 | 36 | if (documentRoot == null || documentRoot.Name == null || !"html".Equals(documentRoot.Name.LocalName, StringComparison.OrdinalIgnoreCase)) 37 | { 38 | HasValue = false; 39 | return; 40 | } 41 | 42 | var headElement = documentRoot.GetChildrenByTagName("head").FirstOrDefault(); 43 | 44 | if (headElement == null) 45 | { 46 | HasValue = false; 47 | return; 48 | } 49 | 50 | IEnumerable meta = headElement.GetChildrenByTagName("meta"); 51 | IEnumerable link = headElement.GetChildrenByTagName("link"); 52 | 53 | Tags = meta != null ? meta.Concat(link) : link; 54 | HasValue = Tags != null && Tags.Count() > 0; 55 | } 56 | 57 | 58 | /// 59 | /// Gets the meta description. 60 | /// 61 | /// 62 | public string GetMetaDescription() 63 | { 64 | return SearchCandidates(new Dictionary() 65 | { 66 | { "property|og:description", "content" }, 67 | { "name|description", "content" } 68 | }); 69 | } 70 | 71 | 72 | /// 73 | /// Gets the meta image. 74 | /// 75 | /// 76 | public string GetMetaImage() 77 | { 78 | return SearchCandidates(new Dictionary() 79 | { 80 | { "property|og:image", "content" }, 81 | { "rel|apple-touch-icon", "href" }, 82 | { "rel|apple-touch-icon-precomposed", "href"}, 83 | { "name|msapplication-square310x310logo", "content" }, 84 | { "name|msapplication-square150x150logo", "content" }, 85 | { "name|msapplication-square70x70logo", "content" }, 86 | { "name|msapplication-TileImage", "content" }, 87 | { "rel|image_src", "href" } 88 | }); 89 | } 90 | 91 | 92 | /// 93 | /// Gets the meta favicon. 94 | /// 95 | /// 96 | public string GetMetaFavicon() 97 | { 98 | return SearchCandidates(new Dictionary() 99 | { 100 | { "rel|icon", "href" }, 101 | { "rel|shortcut icon", "href" } 102 | }); 103 | } 104 | 105 | 106 | /// 107 | /// Gets the charset. 108 | /// 109 | /// 110 | public string GetCharset() 111 | { 112 | // find: 113 | string result = SearchCandidates(new Dictionary() 114 | { 115 | { "charset", "charset" } 116 | }, true); 117 | 118 | if (String.IsNullOrEmpty(result)) 119 | { 120 | // find: 121 | result = SearchCandidates(new Dictionary() 122 | { 123 | { "http-equiv|Content-Type", "content" } 124 | }); 125 | 126 | if (!String.IsNullOrEmpty(result)) 127 | { 128 | int charsetStart = result.IndexOf("charset="); 129 | if (charsetStart > 0) 130 | { 131 | charsetStart += 8; 132 | result = result.Substring(charsetStart, result.Length - charsetStart); 133 | } 134 | } 135 | } 136 | 137 | return String.IsNullOrEmpty(result) ? null : result.ToUpper(); 138 | } 139 | 140 | 141 | /// 142 | /// Searches the candidates. 143 | /// 144 | /// The candidates. 145 | /// 146 | private string SearchCandidates(Dictionary candidates, bool simple = false) 147 | { 148 | string result = null; 149 | 150 | foreach (var candidate in candidates) 151 | { 152 | XElement element; 153 | string[] type = candidate.Key.Split('|'); 154 | 155 | if (simple) 156 | { 157 | element = Tags 158 | .Where(item => item.GetAttributeValue(type[0], null) != null) 159 | .FirstOrDefault(); 160 | } 161 | else 162 | { 163 | element = Tags 164 | .Where(item => String.Equals(item.GetAttributeValue(type[0], null), type[1], StringComparison.OrdinalIgnoreCase)) 165 | .FirstOrDefault(); 166 | } 167 | 168 | if (element != null) 169 | { 170 | result = element.GetAttributeValue(candidate.Value, ""); 171 | } 172 | 173 | if (result != null && result.Length > 1) 174 | { 175 | break; 176 | } 177 | } 178 | 179 | return result; 180 | } 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/NReadability.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 11.0 6 | Debug 7 | AnyCPU 8 | {14C3EE6A-54A4-4A37-8B56-D52A3802F1C2} 9 | Library 10 | Properties 11 | ReadSharp.Ports.NReadability 12 | ReadSharp.Ports.NReadability 13 | v4.5 14 | Profile259 15 | 512 16 | {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} 17 | 18 | 19 | 20 | 21 | 4.0 22 | publish\ 23 | true 24 | Disk 25 | false 26 | Foreground 27 | 7 28 | Days 29 | false 30 | false 31 | true 32 | 0 33 | 1.0.0.%2a 34 | false 35 | false 36 | true 37 | 38 | 39 | true 40 | full 41 | false 42 | bin\Debug\ 43 | DEBUG;TRACE 44 | prompt 45 | 4 46 | 47 | 48 | pdbonly 49 | true 50 | bin\Release\ 51 | TRACE 52 | prompt 53 | 4 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | {9112414c-e2d1-43ba-a298-a89f77d94332} 86 | SgmlReader 87 | 88 | 89 | 90 | 91 | False 92 | .NET Framework 3.5 SP1 Client Profile 93 | false 94 | 95 | 96 | False 97 | .NET Framework 3.5 SP1 98 | false 99 | 100 | 101 | 102 | 109 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Resources; 2 | using System.Reflection; 3 | using System.Runtime.CompilerServices; 4 | using System.Runtime.InteropServices; 5 | 6 | // General Information about an assembly is controlled through the following 7 | // set of attributes. Change these attribute values to modify the information 8 | // associated with an assembly. 9 | [assembly: AssemblyTitle("NReadabilityPCL")] 10 | [assembly: AssemblyDescription("")] 11 | [assembly: AssemblyConfiguration("")] 12 | [assembly: AssemblyCompany("")] 13 | [assembly: AssemblyProduct("NReadabilityPCL")] 14 | [assembly: AssemblyCopyright("Copyright © 2013")] 15 | [assembly: AssemblyTrademark("")] 16 | [assembly: AssemblyCulture("")] 17 | [assembly: NeutralResourcesLanguage("en")] 18 | 19 | // Version information for an assembly consists of the following four values: 20 | // 21 | // Major Version 22 | // Minor Version 23 | // Build Number 24 | // Revision 25 | // 26 | // You can specify all the values or you can default the Build and Revision Numbers 27 | // by using the '*' as shown below: 28 | // [assembly: AssemblyVersion("1.0.*")] 29 | [assembly: AssemblyVersion("1.0.0.0")] 30 | [assembly: AssemblyFileVersion("1.0.0.0")] 31 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/Resources/readability.css: -------------------------------------------------------------------------------- 1 | /* Document */ 2 | body {font-size: 100%;} 3 | #readability-logo,#arc90-logo,.footer-twitterLink,#readTools a,a.rdbTK-powered span{background-color:transparent!important;background-image:url(http://lab.arc90.com/experiments/readability/images/sprite-readability.png)!important;background-repeat:no-repeat!important;} 4 | #readOverlay {display:block;position:absolute;top:0;left:0;width:100%;} 5 | #readInner {line-height:1.4em;max-width:800px;margin:1em auto;} 6 | #readInner a {color:#039;text-decoration:none;} 7 | #readInner a:hover {text-decoration:underline;} 8 | #readInner img {float:left;clear:both;margin: 0 12px 12px 0;} 9 | #readInner h1 {display:block;width:100%;border-bottom:1px solid #333;font-size:1.2em;padding-bottom:.5em;margin-top:0;margin-bottom:.75em;} 10 | #readInner sup{line-height:.8em;} 11 | #readInner .page-separator{clear:both;display:block;font-size:.85em;filter:alpha(opacity=20);opacity:.20;text-align:center;} 12 | .style-apertura #readInner h1 {border-bottom-color:#ededed;} 13 | #readInner blockquote {margin-left:3em;margin-right:3em;} 14 | #readability-inner * {margin-bottom:16px;border:none;background:none;} 15 | /* Footer */ 16 | #readFooter {display:block;border-top:1px solid #333;text-align:center;clear:both;overflow:hidden;} 17 | .style-apertura #readFooter {border-top-color:#ededed;} 18 | #rdb-footer-left {display:inline;float:left;margin-top:15px;width:285px;background-position:0 -36px;} 19 | .rdbTypekit #rdb-footer-left {width:475px;} 20 | #rdb-footer-left a,#rdb-footer-left a:link {float:left;} 21 | #readability-logo {display:inline;background-position:0 -36px;height:29px;width:189px;text-indent:-9000px;} 22 | #arc90-logo {display:inline;background-position:right -36px;height:29px;width:96px;text-indent:-9000px;} 23 | #readability-url {display:none;} 24 | .style-apertura #readability-logo {background-position:0 -67px;} 25 | .style-apertura #arc90-logo {background-position:right -67px;} 26 | #rdb-footer-right {display:inline;float:right;text-align:right;font-size:.75em;margin-top:18px;} 27 | #rdb-footer-right a {display:inline-block;float:left;overflow:visible;line-height:16px;vertical-align:baseline;} 28 | .footer-twitterLink {height:20px;margin-left:20px;padding:4px 0 0 28px;background-position:0 -123px;font-size:12px;} 29 | #rdb-footer-left .footer-twitterLink {display:none;margin-top:1px;padding-top:2px;} 30 | .rdbTypekit #rdb-footer-right .footer-twitterLink {display:none;} 31 | .rdbTypekit #rdb-footer-left .footer-twitterLink {display:inline-block!important;} 32 | a.rdbTK-powered,a.rdbTK-powered:link,a.rdbTK-powered:hover {font-size:16px;color:#858789!important;text-decoration:none!important;} 33 | a.rdbTK-powered span {display:inline-block;height:22px;margin-left:2px;padding:4px 0 0 26px;background-position:0 -146px!important;} 34 | .style-apertura #rdb-inverse,.style-athelas #rdb-athelas {display:block;} 35 | span.version {display:none;} 36 | /* Tools */ 37 | #readTools {width:34px;height:150px;position:fixed;z-index:100;top:10px;left:10px;} 38 | #readTools a {overflow:hidden;margin-bottom:8px;display:block;opacity:.4;text-indent:-99999px;height:34px;width:34px;text-decoration:none;filter:alpha(opacity=40);} 39 | #reload-page {background-position:0 0;} 40 | #print-page {background-position:-36px 0;} 41 | #email-page {background-position:-72px 0;} 42 | #kindle-page {background-position:-108px 0;} 43 | #readTools a:hover {opacity:1;filter:alpha(opacity=100);} 44 | /* -- USER-CONFIGURABLE STYLING -- */ 45 | /* Size */ 46 | .size-x-small {font-size:.75em;} 47 | .size-small {font-size:.938em;} 48 | .size-medium {font-size:1.125em;} 49 | .size-large {font-size:1.375em;} 50 | .size-x-large {font-size:1.75em;} 51 | /* Style */ 52 | .style-newspaper {font-family:"Times New Roman", Times, serif;background:#fbfbfb;color:#080000;} 53 | .style-newspaper h1 {text-transform:capitalize;font-family:Georgia, "Times New Roman", Times, serif;} 54 | .style-newspaper #readInner a {color:#0924e1;} 55 | .style-novel {font-family:"Palatino Linotype", "Book Antiqua", Palatino, serif;background:#f4eed9;color:#1d1916;} 56 | .style-novel #readInner a {color:#1856ba;} 57 | .style-ebook {font-family:Arial, Helvetica, sans-serif;background:#edebe8;color:#2c2d32;} 58 | .style-ebook #readInner a {color:#187dc9;} 59 | .style-ebook h1 {font-family:"Arial Black", Gadget, sans-serif;font-weight:400;} 60 | .style-terminal {font-family:"Lucida Console", Monaco, monospace;background:#1d4e2c;color:#c6ffc6;} 61 | .style-terminal #readInner a {color:#093;} 62 | /* Typekit */ 63 | .style-apertura {font-family:"apertura-1", "apertura-2", sans-serif;background-color:#2d2828;color:#eae8e9;} 64 | .style-apertura #readInner a {color:#58b0ff;} 65 | .style-athelas {font-family:"athelas-1", "athelas-2", "Palatino Linotype", "Book Antiqua", Palatino, serif;background-color:#f7f7f7;color:#2b373d;} 66 | .style-athelas #readInner a {color:#1e83cb;} 67 | /* Margin */ 68 | .margin-x-narrow {width:95%;} 69 | .margin-narrow {width:85%;} 70 | .margin-medium {width:75%;} 71 | .margin-wide {width:55%;} 72 | .margin-x-wide {width:35%;} 73 | /* -- USER-CONFIGURABLE STYLING -- */ 74 | /* -- DEBUG -- */ 75 | .bug-green {background:#bbf9b0;border:4px solid green;} 76 | .bug-red {background:red;} 77 | .bug-yellow {background:#ffff8e;} 78 | .bug-blue {background:#bfdfff;} 79 | /* -- EMAIL / KINDLE POP UP -- */ 80 | #kindle-container, #email-container {position:fixed;top:60px;left:50%;width:500px;height:490px;border:solid 3px #666;background-color:#fff;z-index:100!important;overflow:hidden;margin:0 0 0 -240px;padding:0;} 81 | /* Override html styling attributes */ 82 | table, tr, td { background-color: transparent !important; } 83 | -------------------------------------------------------------------------------- /PortablePorts/NReadability/SgmlDomBuilder.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * NReadability 3 | * http://code.google.com/p/nreadability/ 4 | * 5 | * Copyright 2010 Marek Stój 6 | * http://immortal.pl/ 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | using System; 22 | using System.Text; 23 | using System.Text.RegularExpressions; 24 | using System.Xml; 25 | using System.Xml.Linq; 26 | using ReadSharp.Ports.Sgml; 27 | using System.IO; 28 | 29 | namespace ReadSharp.Ports.NReadability 30 | { 31 | /// 32 | /// A class for constructing a DOM from HTML markup. 33 | /// 34 | public class SgmlDomBuilder 35 | { 36 | #region Public methods 37 | 38 | /// 39 | /// Constructs a DOM (System.Xml.Linq.XDocument) from HTML markup. 40 | /// 41 | /// HTML markup from which the DOM is to be constructed. 42 | /// System.Linq.Xml.XDocument instance which is a DOM of the provided HTML markup. 43 | public XDocument BuildDocument(string htmlContent) 44 | { 45 | if (htmlContent == null) 46 | { 47 | throw new ArgumentNullException("htmlContent"); 48 | } 49 | 50 | if (htmlContent.Trim().Length == 0) 51 | { 52 | return new XDocument(); 53 | } 54 | 55 | // "trim end" htmlContent to ...$ (codinghorror.com puts some scripts after the - sic!) 56 | const string htmlEnd = "', indexOfHtmlEnd); 62 | 63 | if (indexOfHtmlEndBracket != -1) 64 | { 65 | htmlContent = htmlContent.Substring(0, indexOfHtmlEndBracket + 1); 66 | } 67 | } 68 | 69 | XDocument document; 70 | 71 | try 72 | { 73 | document = LoadDocument(htmlContent); 74 | } 75 | catch (InvalidOperationException exc) 76 | { 77 | // sometimes SgmlReader doesn't handle