├── Interop ├── .gitignore ├── TidyEOFFunc.cs ├── TidyGetByteFunc.cs ├── TidyPutByteFunc.cs ├── TidyUngetByteFunc.cs ├── TidyOutputSink.cs ├── TidyInputSource.cs ├── PInvoke.cs └── TidyOptionId.cs ├── .gitignore ├── TidyManaged.sln ├── SortStrategy.cs ├── RepeatedAttributeMode.cs ├── AutoBool.cs ├── OutputSink.cs ├── NewlineType.cs ├── AccessibilityCheckLevel.cs ├── DocTypeMode.cs ├── InputSource.cs ├── AssemblyInfo.cs ├── TidyManaged.csproj ├── EncodingType.cs ├── README.md └── Document.cs /Interop/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | bin 3 | *.userprefs 4 | *.pidb 5 | TestHarness 6 | -------------------------------------------------------------------------------- /TidyManaged.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 10.00 3 | # Visual Studio 2008 4 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TidyManaged", "TidyManaged.csproj", "{D799633D-00EF-437C-B158-315557D930FC}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Any CPU = Debug|Any CPU 9 | Release|Any CPU = Release|Any CPU 10 | EndGlobalSection 11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 12 | {D799633D-00EF-437C-B158-315557D930FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 13 | {D799633D-00EF-437C-B158-315557D930FC}.Debug|Any CPU.Build.0 = Debug|Any CPU 14 | {D799633D-00EF-437C-B158-315557D930FC}.Release|Any CPU.ActiveCfg = Release|Any CPU 15 | {D799633D-00EF-437C-B158-315557D930FC}.Release|Any CPU.Build.0 = Release|Any CPU 16 | EndGlobalSection 17 | GlobalSection(MonoDevelopProperties) = preSolution 18 | StartupItem = TidyManaged.csproj 19 | EndGlobalSection 20 | GlobalSection(SolutionProperties) = preSolution 21 | HideSolutionNode = FALSE 22 | EndGlobalSection 23 | EndGlobal 24 | -------------------------------------------------------------------------------- /Interop/TidyEOFFunc.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | 26 | namespace TidyManaged.Interop 27 | { 28 | internal delegate bool TidyEOFFunc(IntPtr sinkData); 29 | } 30 | -------------------------------------------------------------------------------- /Interop/TidyGetByteFunc.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | 26 | namespace TidyManaged.Interop 27 | { 28 | internal delegate byte TidyGetByteFunc(IntPtr sinkData); 29 | } 30 | -------------------------------------------------------------------------------- /Interop/TidyPutByteFunc.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | 26 | namespace TidyManaged.Interop 27 | { 28 | internal delegate void TidyPutByteFunc(IntPtr sinkData, byte bt); 29 | } 30 | -------------------------------------------------------------------------------- /Interop/TidyUngetByteFunc.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | 26 | namespace TidyManaged.Interop 27 | { 28 | internal delegate void TidyUngetByteFunc(IntPtr sinkData, byte bt); 29 | } 30 | -------------------------------------------------------------------------------- /SortStrategy.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | namespace TidyManaged 25 | { 26 | /// 27 | /// Represents values used by some Tidy properties. 28 | /// 29 | public enum SortStrategy 30 | { 31 | /// 32 | /// None. 33 | /// 34 | None, 35 | /// 36 | /// Alpha. 37 | /// 38 | Alpha 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /RepeatedAttributeMode.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | namespace TidyManaged 25 | { 26 | /// 27 | /// Represents the available repeated-attribute handling modes. 28 | /// 29 | public enum RepeatedAttributeMode 30 | { 31 | /// 32 | /// Keep the first attribute. 33 | /// 34 | KeepFirst, 35 | 36 | /// 37 | /// Keep the last attribute. 38 | /// 39 | KeepLast 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /AutoBool.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | namespace TidyManaged 25 | { 26 | /// 27 | /// Represents yes/no/auto values used by some Tidy properties. 28 | /// 29 | public enum AutoBool 30 | { 31 | /// 32 | /// No. 33 | /// 34 | No, 35 | /// 36 | /// Yes. 37 | /// 38 | Yes, 39 | /// 40 | /// Automatic. 41 | /// 42 | Auto 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /Interop/TidyOutputSink.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | using System.Runtime.InteropServices; 26 | 27 | namespace TidyManaged.Interop 28 | { 29 | internal struct TidyOutputSink 30 | { 31 | internal TidyOutputSink(TidyPutByteFunc putByte) 32 | { 33 | this.sinkData = IntPtr.Zero; 34 | this.putByte = putByte; 35 | } 36 | 37 | #pragma warning disable 0414 38 | IntPtr sinkData; 39 | 40 | [MarshalAs(UnmanagedType.FunctionPtr)] 41 | TidyPutByteFunc putByte; 42 | #pragma warning restore 0414 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /OutputSink.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | using System.IO; 26 | 27 | namespace TidyManaged 28 | { 29 | internal class OutputSink 30 | { 31 | internal OutputSink(Stream stream) 32 | { 33 | this.stream = stream; 34 | this.TidyOutputSink = new Interop.TidyOutputSink(new Interop.TidyPutByteFunc(OnPutByte)); 35 | } 36 | 37 | Stream stream; 38 | internal Interop.TidyOutputSink TidyOutputSink; 39 | 40 | void OnPutByte(IntPtr sinkData, byte bt) 41 | { 42 | this.stream.WriteByte(bt); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /NewlineType.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | namespace TidyManaged 25 | { 26 | /// 27 | /// Represents the available newline types. 28 | /// 29 | public enum NewlineType 30 | { 31 | /// 32 | /// LF (used by Mac OS X, Unix, Linux). 33 | /// 34 | Linefeed, 35 | 36 | /// 37 | /// CRLF (used by Microsoft Windows, DOS etc). 38 | /// 39 | CarriageReturnLinefeed, 40 | 41 | /// 42 | /// CR (used by Mac OS 9 and earlier). 43 | /// 44 | CarriageReturn 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /AccessibilityCheckLevel.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | namespace TidyManaged 25 | { 26 | /// 27 | /// Represents the available accessibility check levels. 28 | /// 29 | public enum AccessibilityCheckLevel 30 | { 31 | /// 32 | /// Equivalent to Tidy Classic's accessibility checking. 33 | /// 34 | TidyClassic = 0, 35 | /// 36 | /// Priority 1. 37 | /// 38 | Priority1 = 1, 39 | /// 40 | /// Priority 2. 41 | /// 42 | Priority2 = 2, 43 | /// 44 | /// Priority 3. 45 | /// 46 | Priority3 = 3 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /DocTypeMode.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | namespace TidyManaged 25 | { 26 | /// 27 | /// Represents the available accessibility DOCTYPE modes. 28 | /// 29 | public enum DocTypeMode 30 | { 31 | /// 32 | /// Omit DOCTYPE altogether. 33 | /// 34 | Omit, 35 | 36 | /// 37 | /// Keep DOCTYPE in input. Set version to content. 38 | /// 39 | Auto, 40 | 41 | /// 42 | /// Convert document to strict content model. 43 | /// 44 | Strict, 45 | 46 | /// 47 | /// Convert document to transitional content model. 48 | /// 49 | Loose, 50 | 51 | /// 52 | /// User-specified doctype. 53 | /// 54 | User 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /Interop/TidyInputSource.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | using System.Runtime.InteropServices; 26 | 27 | namespace TidyManaged.Interop 28 | { 29 | internal struct TidyInputSource 30 | { 31 | internal TidyInputSource(TidyGetByteFunc getByte, TidyUngetByteFunc ungetByte, TidyEOFFunc eof) 32 | { 33 | this.sourceData = IntPtr.Zero; 34 | this.getByte = getByte; 35 | this.ungetByte = ungetByte; 36 | this.eof = eof; 37 | } 38 | 39 | #pragma warning disable 0414 40 | IntPtr sourceData; 41 | 42 | [MarshalAs(UnmanagedType.FunctionPtr)] 43 | TidyGetByteFunc getByte; 44 | 45 | [MarshalAs(UnmanagedType.FunctionPtr)] 46 | TidyUngetByteFunc ungetByte; 47 | 48 | [MarshalAs(UnmanagedType.FunctionPtr)] 49 | TidyEOFFunc eof; 50 | #pragma warning restore 0414 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /InputSource.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | using System.IO; 26 | 27 | namespace TidyManaged 28 | { 29 | internal class InputSource 30 | { 31 | internal InputSource(Stream stream) 32 | { 33 | this.stream = stream; 34 | this.TidyInputSource = new Interop.TidyInputSource(new Interop.TidyGetByteFunc(OnGetByte), new Interop.TidyUngetByteFunc(OnUngetByte), new Interop.TidyEOFFunc(OnEOF)); 35 | } 36 | 37 | Stream stream; 38 | internal Interop.TidyInputSource TidyInputSource; 39 | 40 | byte OnGetByte(IntPtr sinkData) 41 | { 42 | return (byte) this.stream.ReadByte(); 43 | } 44 | 45 | void OnUngetByte(IntPtr sinkData, byte bt) 46 | { 47 | if (this.stream.Position > 0) this.stream.Position--; 48 | } 49 | 50 | bool OnEOF(IntPtr sinkData) 51 | { 52 | return (this.stream.Position >= this.stream.Length); 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System.Reflection; 25 | using System.Runtime.CompilerServices; 26 | using System.Runtime.InteropServices; 27 | 28 | [assembly: AssemblyTitle("TidyManaged")] 29 | [assembly: AssemblyDescription("Managed .NET wrapper for the HTML Tidy library")] 30 | [assembly: AssemblyConfiguration("")] 31 | [assembly: AssemblyCompany("Mark Beaton")] 32 | [assembly: AssemblyProduct("TidyManaged")] 33 | [assembly: AssemblyCopyright("Copyright © Mark Beaton 2009")] 34 | [assembly: AssemblyTrademark("")] 35 | [assembly: AssemblyCulture("")] 36 | 37 | // Setting ComVisible to false makes the types in this assembly not visible 38 | // to COM components. If you need to access a type in this assembly from 39 | // COM, set the ComVisible attribute to true on that type. 40 | [assembly: ComVisible(false)] 41 | 42 | // The following GUID is for the ID of the typelib if this project is exposed to COM 43 | [assembly: Guid("1c09c222-dbe1-44b3-8983-d4116ec3e051")] 44 | 45 | // Version information for an assembly consists of the following four values: 46 | // 47 | // Major Version 48 | // Minor Version 49 | // Build Number 50 | // Revision 51 | // 52 | // You can specify all the values or you can default the Revision and Build Numbers 53 | // by using the '*' as shown below: 54 | [assembly: AssemblyVersion("1.0.0.0")] 55 | [assembly: AssemblyFileVersion("1.0.0.0")] 56 | -------------------------------------------------------------------------------- /TidyManaged.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Debug 5 | AnyCPU 6 | 9.0.30729 7 | 2.0 8 | {D799633D-00EF-437C-B158-315557D930FC} 9 | Library 10 | Properties 11 | TidyManaged 12 | TidyManaged 13 | v2.0 14 | 512 15 | 16 | 17 | true 18 | full 19 | false 20 | bin\Debug\ 21 | TRACE;DEBUG;SUPPORT_UTF16_ENCODINGS;SUPPORT_ASIAN_ENCODINGS; 22 | prompt 23 | 4 24 | 25 | 26 | none 27 | true 28 | bin\Release\ 29 | TRACE;SUPPORT_UTF16_ENCODINGS;SUPPORT_ASIAN_ENCODINGS; 30 | prompt 31 | 4 32 | bin\Release\TidyManaged.xml 33 | true 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 69 | -------------------------------------------------------------------------------- /Interop/PInvoke.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | using System.Runtime.InteropServices; 26 | 27 | namespace TidyManaged.Interop 28 | { 29 | internal class PInvoke 30 | { 31 | [DllImport("libtidy.dll")] 32 | internal static extern IntPtr tidyCreate(); 33 | 34 | [DllImport("libtidy.dll")] 35 | internal static extern void tidyRelease(IntPtr tdoc); 36 | 37 | [DllImport("libtidy.dll")] 38 | internal static extern IntPtr tidyReleaseDate(); 39 | 40 | [DllImport("libtidy.dll")] 41 | internal static extern IntPtr tidyOptGetValue(IntPtr tdoc, TidyOptionId optId); 42 | 43 | [DllImport("libtidy.dll")] 44 | internal static extern bool tidyOptSetValue(IntPtr tdoc, TidyOptionId optId, string val); 45 | 46 | [DllImport("libtidy.dll")] 47 | internal static extern uint tidyOptGetInt(IntPtr tdoc, TidyOptionId optId); 48 | 49 | [DllImport("libtidy.dll")] 50 | internal static extern bool tidyOptSetInt(IntPtr tdoc, TidyOptionId optId, uint val); 51 | 52 | [DllImport("libtidy.dll")] 53 | internal static extern bool tidyOptGetBool(IntPtr tdoc, TidyOptionId optId); 54 | 55 | [DllImport("libtidy.dll")] 56 | internal static extern bool tidyOptSetBool(IntPtr tdoc, TidyOptionId optId, bool val); 57 | 58 | [DllImport("libtidy.dll")] 59 | internal static extern int tidyParseFile(IntPtr tdoc, string filename); 60 | 61 | [DllImport("libtidy.dll")] 62 | internal static extern int tidyParseString(IntPtr tdoc, string content); 63 | 64 | [DllImport("libtidy.dll")] 65 | internal static extern int tidyParseSource(IntPtr tdoc, ref TidyInputSource source); 66 | 67 | [DllImport("libtidy.dll")] 68 | internal static extern int tidyCleanAndRepair(IntPtr tdoc); 69 | 70 | [DllImport("libtidy.dll")] 71 | internal static extern int tidySaveFile(IntPtr tdoc, string filname); 72 | 73 | [DllImport("libtidy.dll")] 74 | internal static extern int tidySaveString(IntPtr tdoc, IntPtr buffer, ref uint buflen); 75 | 76 | [DllImport("libtidy.dll")] 77 | internal static extern int tidySaveSink(IntPtr tdoc, ref TidyOutputSink sink); 78 | 79 | internal static string tidyOptGetValueString(IntPtr tdoc, TidyOptionId optId) 80 | { 81 | return Marshal.PtrToStringAnsi(tidyOptGetValue(tdoc, optId)); 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /EncodingType.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | namespace TidyManaged 25 | { 26 | /// 27 | /// Represents the supported encodings. 28 | /// 29 | public enum EncodingType 30 | { 31 | /// 32 | /// No or unknown encoding. 33 | /// 34 | Raw = 0, 35 | 36 | /// 37 | /// The American Standard Code for Information Interchange (ASCII) encoding scheme. 38 | /// 39 | Ascii = 1, 40 | 41 | /// 42 | /// The ISO/IEC 8859-15 encoding scheme, also knows as Latin-0 and Latin-9. 43 | /// 44 | Latin0 = 2, 45 | 46 | /// 47 | /// The ISO/IEC 8859-1 encoding scheme, also knows as Latin-1. 48 | /// 49 | Latin1 = 3, 50 | 51 | /// 52 | /// The UTF-8 encoding scheme. 53 | /// 54 | Utf8 = 4, 55 | 56 | /// 57 | /// The ISO/IEC 2022 encoding scheme. 58 | /// 59 | Iso2022 = 5, 60 | 61 | /// 62 | /// The MacRoman encoding scheme. 63 | /// 64 | MacRoman = 6, 65 | 66 | /// 67 | /// The Windows-1252 encoding scheme. 68 | /// 69 | Win1252 = 7, 70 | 71 | /// 72 | /// The Code page 858 encoding scheme, also know as CP 858, IBM 858, or OEM 858. 73 | /// 74 | Ibm858 = 8, 75 | 76 | #if SUPPORT_UTF16_ENCODINGS 77 | 78 | /// 79 | /// The UTF-16LE (Little Endian) encoding scheme. 80 | /// 81 | Utf16LittleEndian = 9, 82 | 83 | /// 84 | /// The UTF-16BE (Big Endian) encoding scheme. 85 | /// 86 | Utf16BigEndian = 10, 87 | 88 | /// 89 | /// The UTF-16 encoding scheme, with endianess detected using a BOM. 90 | /// 91 | Utf16 = 11, 92 | 93 | #endif 94 | 95 | #if SUPPORT_ASIAN_ENCODINGS 96 | #if SUPPORT_UTF16_ENCODINGS 97 | 98 | /// 99 | /// The Big-5 or Big5 encoding scheme, used in Taiwan, Hong Kong, and Macau for Traditional Chinese characters. 100 | /// 101 | Big5 = 12, 102 | 103 | /// 104 | /// The Shift JIS encoding scheme for Japanese characters. 105 | /// 106 | ShiftJIS = 13 107 | 108 | #else 109 | 110 | /// 111 | /// The Big-5 or Big5 encoding scheme, used in Taiwan, Hong Kong, and Macau for Traditional Chinese characters. 112 | /// 113 | Big5 = 9, 114 | 115 | /// 116 | /// The Shift JIS encoding scheme for Japanese characters. 117 | /// 118 | ShiftJIS = 10 119 | 120 | #endif 121 | #endif 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TidyManaged 2 | 3 | This is a managed .NET/Mono wrapper for the open source, cross-platform Tidy library, a HTML/XHTML/XML markup parser & cleaner originally created by Dave Raggett. 4 | 5 | I'm not going to explain Tidy's "raison d'être" - please read [Dave Raggett's original web page](http://www.w3.org/People/Raggett/tidy/) for more information, or the [SourceForge project](http://tidy.sourceforge.net/) that has taken over maintenance of the library. 6 | 7 | ## libtidy 8 | 9 | This wrapper is written in C#, and makes use of .NET platform invoke (p/invoke) functionality to interoperate with the Tidy library "libtidy" (written in portable ANSI C). 10 | 11 | Therefore, you'll also need a build of the binary appropriate for your platform. If you're after a 32 or 64 bit Windows build, or you want a more recent build for Mac OS X than the one that is bundled with the OS, try these: 12 | 13 | - [Windows 32-bit build](http://wemakeapps.net/downloads/TidyManaged/libtidy.dll.Win32.zip) 14 | - [Windows 64-bit build](http://wemakeapps.net/downloads/TidyManaged/libtidy.dll.Win64.zip) 15 | - [Mac x64/x86/PPC fat binary](http://wemakeapps.net/downloads/TidyManaged/libtidy.dylib.zip) - this is a newer build (25 March 2009) than the version included in default OS X installations. 16 | 17 | Otherwise, grab the latest source from the [SourceForge project](http://tidy.sourceforge.net/), and roll your own. 18 | 19 | ## Sample Usage 20 | 21 | Here's a quick'n'dirty example using a simple console app. 22 | Note: always remember to .Dispose() of your Document instance (or wrap it in a "using" statement), so the interop layer can clean up any unmanaged resources (memory, file handles etc) when it's done cleaning. 23 | 24 | using System; 25 | using TidyManaged; 26 | 27 | public class Test 28 | { 29 | public static void Main(string[] args) 30 | { 31 | using (Document doc = Document.FromString("test</tootle><body>asd</body>")) 32 | { 33 | doc.ShowWarnings = false; 34 | doc.Quiet = true; 35 | doc.OutputXhtml = true; 36 | doc.CleanAndRepair(); 37 | string parsed = doc.Save(); 38 | Console.WriteLine(parsed); 39 | } 40 | } 41 | } 42 | 43 | results in: 44 | 45 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 46 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 47 | <html xmlns="http://www.w3.org/1999/xhtml"> 48 | <head> 49 | <meta name="generator" content= 50 | "HTML Tidy for Mac OS X (vers 31 October 2006 - Apple Inc. build 13), see www.w3.org" /> 51 | <title>test 52 | 53 | 54 | asd 55 | 56 | 57 | 58 | ## Notes for non-Windows platforms 59 | 60 | Thanks to the platform-agnostic nature of ANSI C, and the excellent work of the people at the [Mono Project](http://www.mono-project.com/), you can use this wrapper library anywhere that Mono is supported, assuming you can have (or can build) a version of the underlying Tidy library for your platform. That shouldn't be too hard - it's a default part of a standard Mac OS X install, for example; it probably is for most Linux distributions as well. 61 | 62 | Under Mono, you might need to re-map the p/invoke calls to the appropriate library - or you might find it just works. See [this page on DLL mapping](http://www.mono-project.com/Config_DllMap) for more information on achieving this. Note: the .config file needs to be configured for the TidyManaged DLL, NOT your application's binary. 63 | 64 | ### Example TidyManaged.dll.config 65 | 66 | 67 | 68 | 69 | 70 | ## The API 71 | 72 | At this stage I've just created a basic mapping of each of the configuration options made available by Tidy to properties of the main Document object - I've renamed a few things here & there, but it should be pretty easy to figure out what each property does (the documentation included in the code includes the original Tidy option name for each property). You can read the [Tidy configuration documentation here](http://tidy.sourceforge.net/docs/quickref.html). 73 | 74 | ## The Future 75 | 76 | At some point I'll add a nicer ".NET-style" API layer over the top, as it's a bit clunky (although perfectly usable) at the moment. 77 | -------------------------------------------------------------------------------- /Interop/TidyOptionId.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | 26 | namespace TidyManaged.Interop 27 | { 28 | internal enum TidyOptionId 29 | { 30 | TidyUnknownOption, /*< Unknown option! */ 31 | TidyIndentSpaces, /*< Indentation n spaces */ 32 | TidyWrapLen, /*< Wrap margin */ 33 | TidyTabSize, /*< Expand tabs to n spaces */ 34 | TidyCharEncoding, /*< In/out character encoding */ 35 | TidyInCharEncoding, /*< Input character encoding (if different) */ 36 | TidyOutCharEncoding, /*< Output character encoding (if different) */ 37 | TidyNewline, /*< Output line ending (default to platform) */ 38 | TidyDoctypeMode, /*< See doctype property */ 39 | TidyDoctype, /*< User specified doctype */ 40 | TidyDuplicateAttrs, /*< Keep first or last duplicate attribute */ 41 | TidyAltText, /*< Default text for alt attribute */ 42 | 43 | [Obsolete] 44 | TidySlideStyle, /*< Style sheet for slides: not used for anything yet */ 45 | 46 | TidyErrFile, /*< File name to write errors to */ 47 | TidyOutFile, /*< File name to write markup to */ 48 | TidyWriteBack, /*< If true then output tidied markup */ 49 | TidyShowMarkup, /*< If false, normal output is suppressed */ 50 | TidyShowWarnings, /*< However errors are always shown */ 51 | TidyQuiet, /*< No 'Parsing X', guessed DTD or summary */ 52 | TidyIndentContent, /*< Indent content of appropriate tags */ 53 | /*< "auto" does text/block level content indentation */ 54 | TidyHideEndTags, /*< Suppress optional end tags */ 55 | TidyXmlTags, /*< Treat input as XML */ 56 | TidyXmlOut, /*< Create output as XML */ 57 | TidyXhtmlOut, /*< Output extensible HTML */ 58 | TidyHtmlOut, /*< Output plain HTML, even for XHTML input. 59 | Yes means set explicitly. */ 60 | TidyXmlDecl, /*< Add for XML docs */ 61 | TidyUpperCaseTags, /*< Output tags in upper not lower case */ 62 | TidyUpperCaseAttrs, /*< Output attributes in upper not lower case */ 63 | TidyMakeBare, /*< Make bare HTML: remove Microsoft cruft */ 64 | TidyMakeClean, /*< Replace presentational clutter by style rules */ 65 | TidyLogicalEmphasis, /*< Replace i by em and b by strong */ 66 | TidyDropPropAttrs, /*< Discard proprietary attributes */ 67 | TidyDropFontTags, /*< Discard presentation tags */ 68 | TidyDropEmptyParas, /*< Discard empty p elements */ 69 | TidyFixComments, /*< Fix comments with adjacent hyphens */ 70 | TidyBreakBeforeBR, /*< Output newline before
or not? */ 71 | 72 | [Obsolete] 73 | TidyBurstSlides, /*< Create slides on each h2 element */ 74 | 75 | TidyNumEntities, /*< Use numeric entities */ 76 | TidyQuoteMarks, /*< Output " marks as " */ 77 | TidyQuoteNbsp, /*< Output non-breaking space as entity */ 78 | TidyQuoteAmpersand, /*< Output naked ampersand as & */ 79 | TidyWrapAttVals, /*< Wrap within attribute values */ 80 | TidyWrapScriptlets, /*< Wrap within JavaScript string literals */ 81 | TidyWrapSection, /*< Wrap within section tags */ 82 | TidyWrapAsp, /*< Wrap within ASP pseudo elements */ 83 | TidyWrapJste, /*< Wrap within JSTE pseudo elements */ 84 | TidyWrapPhp, /*< Wrap within PHP pseudo elements */ 85 | TidyFixBackslash, /*< Fix URLs by replacing \ with / */ 86 | TidyIndentAttributes,/*< Newline+indent before each attribute */ 87 | TidyXmlPIs, /*< If set to yes PIs must end with ?> */ 88 | TidyXmlSpace, /*< If set to yes adds xml:space attr as needed */ 89 | TidyEncloseBodyText, /*< If yes text at body is wrapped in P's */ 90 | TidyEncloseBlockText,/*< If yes text in blocks is wrapped in P's */ 91 | TidyKeepFileTimes, /*< If yes last modied time is preserved */ 92 | TidyWord2000, /*< Draconian cleaning for Word2000 */ 93 | TidyMark, /*< Add meta element indicating tidied doc */ 94 | TidyEmacs, /*< If true format error output for GNU Emacs */ 95 | TidyEmacsFile, /*< Name of current Emacs file */ 96 | TidyLiteralAttribs, /*< If true attributes may use newlines */ 97 | TidyBodyOnly, /*< Output BODY content only */ 98 | TidyFixUri, /*< Applies URI encoding if necessary */ 99 | TidyLowerLiterals, /*< Folds known attribute values to lower case */ 100 | TidyHideComments, /*< Hides all (real) comments in output */ 101 | TidyIndentCdata, /*< Indent section */ 102 | TidyForceOutput, /*< Output document even if errors were found */ 103 | TidyShowErrors, /*< Number of errors to put out */ 104 | TidyAsciiChars, /*< Convert quotes and dashes to nearest ASCII char */ 105 | TidyJoinClasses, /*< Join multiple class attributes */ 106 | TidyJoinStyles, /*< Join multiple style attributes */ 107 | TidyEscapeCdata, /*< Replace sections with escaped text */ 108 | #if SUPPORT_ASIAN_ENCODINGS 109 | TidyLanguage, /*< Language property: not used for anything yet */ 110 | TidyNCR, /*< Allow numeric character references */ 111 | #else 112 | TidyLanguageNotUsed, 113 | TidyNCRNotUsed, 114 | #endif 115 | #if SUPPORT_UTF16_ENCODINGS 116 | TidyOutputBOM, /**< Output a Byte Order Mark (BOM) for UTF-16 encodings */ 117 | /**< auto: if input stream has BOM, we output a BOM */ 118 | #else 119 | TidyOutputBOMNotUsed, 120 | #endif 121 | TidyReplaceColor, /*< Replace hex color attribute values with names */ 122 | TidyCSSPrefix, /*< CSS class naming for -clean option */ 123 | TidyInlineTags, /*< Declared inline tags */ 124 | TidyBlockTags, /*< Declared block tags */ 125 | TidyEmptyTags, /*< Declared empty tags */ 126 | TidyPreTags, /*< Declared pre tags */ 127 | TidyAccessibilityCheckLevel, /*< Accessibility check level 128 | 0 (old style), or 1, 2, 3 */ 129 | TidyVertSpace, /*< degree to which markup is spread out vertically */ 130 | #if SUPPORT_ASIAN_ENCODINGS 131 | TidyPunctWrap, /*< consider punctuation and breaking spaces for wrapping */ 132 | #else 133 | TidyPunctWrapNotUsed, 134 | #endif 135 | TidyMergeDivs, /*< Merge multiple DIVs */ 136 | TidyDecorateInferredUL, /*< Mark inferred UL elements with no indent CSS */ 137 | TidyPreserveEntities, /*< Preserve entities */ 138 | TidySortAttributes, /*< Sort attributes */ 139 | TidyMergeSpans, /*< Merge multiple SPANs */ 140 | TidyAnchorAsName, /*< Define anchors as name attributes */ 141 | N_TIDY_OPTIONS /*< Must be last */ 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /Document.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 Mark Beaton 2 | // 3 | // Permission is hereby granted, free of charge, to any person 4 | // obtaining a copy of this software and associated documentation 5 | // files (the "Software"), to deal in the Software without 6 | // restriction, including without limitation the rights to use, 7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the 9 | // Software is furnished to do so, subject to the following 10 | // conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be 13 | // included in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | using System; 25 | using System.Collections.Generic; 26 | using System.Diagnostics; 27 | using System.Globalization; 28 | using System.IO; 29 | using System.Runtime.InteropServices; 30 | using System.Text; 31 | using TidyManaged.Interop; 32 | 33 | namespace TidyManaged 34 | { 35 | /// 36 | /// Represents an HTML document (or XML, XHTML) to be processed by Tidy. 37 | /// 38 | public class Document : IDisposable 39 | { 40 | #region Constructors 41 | 42 | Document() 43 | { 44 | this.handle = PInvoke.tidyCreate(); 45 | this.disposed = false; 46 | } 47 | 48 | Document(string htmlString) 49 | : this() 50 | { 51 | this.htmlString = htmlString; 52 | this.fromString = true; 53 | } 54 | 55 | 56 | Document(Stream stream) 57 | : this() 58 | { 59 | this.stream = stream; 60 | } 61 | 62 | #endregion 63 | 64 | #region Fields 65 | 66 | IntPtr handle; 67 | Stream stream; 68 | string htmlString; 69 | bool fromString; 70 | bool disposed; 71 | bool cleaned; 72 | 73 | #endregion 74 | 75 | #region Properties 76 | 77 | DateTime? _ReleaseDate; 78 | static readonly object releaseDateLock = new object(); 79 | /// 80 | /// Gets the release date of the underlying Tidy library. 81 | /// 82 | public DateTime ReleaseDate 83 | { 84 | get 85 | { 86 | lock (releaseDateLock) 87 | { 88 | if (!_ReleaseDate.HasValue) 89 | { 90 | DateTime val = DateTime.MinValue; 91 | string release = Marshal.PtrToStringAnsi(PInvoke.tidyReleaseDate()); 92 | if (release != null) 93 | { 94 | string[] tokens = release.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries); 95 | if (tokens.Length >= 3) 96 | { 97 | DateTime.TryParseExact(tokens[0] + " " + tokens[1] + " " + tokens[2], "d MMMM yyyy", CultureInfo.InvariantCulture, DateTimeStyles.None, out val); 98 | } 99 | } 100 | _ReleaseDate = val; 101 | } 102 | return _ReleaseDate.Value; 103 | } 104 | } 105 | } 106 | 107 | #region HTML, XHTML, XML Options 108 | 109 | /// 110 | /// [add-xml-decl] Gets or sets whether Tidy should add the XML declaration when outputting XML or XHTML. Note that if the input already includes an <?xml ... ?> declaration then this option will be ignored. If the encoding for the output is different from "ascii", one of the utf encodings or "raw", the declaration is always added as required by the XML standard. Defaults to false. 111 | /// 112 | public bool AddXmlDeclaration 113 | { 114 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXmlDecl); } 115 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXmlDecl, value); } 116 | } 117 | 118 | /// 119 | /// [add-xml-space] Gets or sets whether Tidy should add xml:space="preserve" to elements such as <PRE>, <STYLE> and <SCRIPT> when generating XML. This is needed if the whitespace in such elements is to be parsed appropriately without having access to the DTD. Defaults to false. 120 | /// 121 | public bool AddXmlSpacePreserve 122 | { 123 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXmlSpace); } 124 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXmlSpace, value); } 125 | } 126 | 127 | /// 128 | /// [alt-text] Gets or sets the default "alt=" text Tidy uses for <IMG> attributes. This feature is dangerous as it suppresses further accessibility warnings. You are responsible for making your documents accessible to people who can not see the images! 129 | /// 130 | public string DefaultAltText 131 | { 132 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyAltText); } 133 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyAltText, value); } 134 | } 135 | 136 | /// 137 | /// [anchor-as-name] Gets or sets the deletion or addition of the name attribute in elements where it can serve as anchor. If set to true, a name attribute, if not already existing, is added along an existing id attribute if the DTD allows it. If set to false, any existing name attribute is removed if an id attribute exists or has been added. Defaults to true. 138 | /// 139 | public bool AnchorAsName 140 | { 141 | // Not available before until 18 Jun 2008 142 | get 143 | { 144 | if (this.ReleaseDate < new DateTime(2008, 6, 18)) 145 | { 146 | Trace.WriteLine("AnchorAsName is not supported by your version of tidylib - ignoring."); 147 | return true; 148 | } 149 | return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyAnchorAsName); 150 | } 151 | set 152 | { 153 | if (this.ReleaseDate < new DateTime(2008, 6, 18)) 154 | Trace.WriteLine("AnchorAsName is not supported by your version of tidylib - ignoring."); 155 | else 156 | PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyAnchorAsName, value); 157 | } 158 | } 159 | 160 | /// 161 | /// [assume-xml-procins] Gets or sets whether Tidy should change the parsing of processing instructions to require ?> as the terminator rather than >. This option is automatically set if the input is in XML. Defaults to false. 162 | /// 163 | public bool ChangeXmlProcessingInstructions 164 | { 165 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXmlPIs); } 166 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXmlPIs, value); } 167 | } 168 | 169 | /// 170 | /// [bare] Gets or sets whether Tidy should strip Microsoft specific HTML from Word 2000 documents, and output spaces rather than non-breaking spaces where they exist in the input. Defaults to false. 171 | /// 172 | public bool MakeBare 173 | { 174 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyMakeBare); } 175 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyMakeBare, value); } 176 | } 177 | 178 | /// 179 | /// [clean] Gets or sets whether Tidy should strip out surplus presentational tags and attributes replacing them by style rules and structural markup as appropriate. It works well on the HTML saved by Microsoft Office products. Defaults to false. 180 | /// 181 | public bool MakeClean 182 | { 183 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyMakeClean); } 184 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyMakeClean, value); } 185 | } 186 | 187 | /// 188 | /// [css-prefix] Gets or sets the prefix that Tidy uses for styles rules. By default, "c" will be used. 189 | /// 190 | public string CssPrefix 191 | { 192 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyCSSPrefix); } 193 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyCSSPrefix, value); } 194 | } 195 | 196 | /// 197 | /// [decorate-inferred-ul] Gets or sets whether Tidy should decorate inferred UL elements with some CSS markup to avoid indentation to the right. Defaults to false. 198 | /// 199 | public bool DecorateInferredUL 200 | { 201 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyDecorateInferredUL); } 202 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyDecorateInferredUL, value); } 203 | } 204 | 205 | /// 206 | /// [doctype] Gets or sets the DOCTYPE declaration generated by Tidy. If set to "Omit" the output won't contain a DOCTYPE declaration. If set to "Auto" (the default) Tidy will use an educated guess based upon the contents of the document. If set to "Strict", Tidy will set the DOCTYPE to the strict DTD. If set to "Loose", the DOCTYPE is set to the loose (transitional) DTD. Alternatively, you can supply a string for the formal public identifier (FPI). 207 | /// 208 | /// For example: 209 | /// doctype: "-//ACME//DTD HTML 3.14159//EN" 210 | /// 211 | /// If you specify the FPI for an XHTML document, Tidy will set the system identifier to an empty string. For an HTML document, Tidy adds a system identifier only if one was already present in order to preserve the processing mode of some browsers. Tidy leaves the DOCTYPE for generic XML documents unchanged. "Omit" implies OutputNumericEntities = true. This option does not offer a validation of the document conformance. 212 | /// 213 | public DocTypeMode DocType 214 | { 215 | get { return (DocTypeMode) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyDoctypeMode); } 216 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyDoctypeMode, (uint) value); } 217 | } 218 | 219 | /// 220 | /// [drop-empty-paras] Gets or sets whether Tidy should discard empty paragraphs. Defaults to true. 221 | /// 222 | public bool DropEmptyParagraphs 223 | { 224 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyDropEmptyParas); } 225 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyDropEmptyParas, value); } 226 | } 227 | 228 | /// 229 | /// [drop-font-tags] Gets or sets whether Tidy should discard <FONT> and <CENTER> tags without creating the corresponding style rules. This option can be set independently of the MakeClean option. Defaults to false. 230 | /// 231 | public bool DropFontTags 232 | { 233 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyDropFontTags); } 234 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyDropFontTags, value); } 235 | } 236 | 237 | /// 238 | /// [drop-proprietary-attributes] Gets or sets whether Tidy should strip out proprietary attributes, such as MS data binding attributes. Defaults to false. 239 | /// 240 | public bool DropProprietaryAttributes 241 | { 242 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyDropPropAttrs); } 243 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyDropPropAttrs, value); } 244 | } 245 | 246 | /// 247 | /// [enclose-block-text] Gets or sets whether Tidy should insert a <P> element to enclose any text it finds in any element that allows mixed content for HTML transitional but not HTML strict. Defaults to false. 248 | /// 249 | public bool EncloseBlockText 250 | { 251 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyEncloseBlockText); } 252 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyEncloseBlockText, value); } 253 | } 254 | 255 | /// 256 | /// [enclose-text] Gets or sets whether Tidy should enclose any text it finds in the body element within a <P> element. This is useful when you want to take existing HTML and use it with a style sheet. Defaults to false. 257 | /// 258 | public bool EncloseBodyText 259 | { 260 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyEncloseBodyText); } 261 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyEncloseBodyText, value); } 262 | } 263 | 264 | /// 265 | /// [escape-cdata] Gets or sets whether Tidy should convert <![CDATA[]]> sections to normal text. Defaults to false. 266 | /// 267 | public bool EscapeCdata 268 | { 269 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyEscapeCdata); } 270 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyEscapeCdata, value); } 271 | } 272 | 273 | /// 274 | /// [fix-backslash] Gets or sets whether Tidy should replace backslash characters "\" in URLs with forward slashes "/". Defaults to true. 275 | /// 276 | public bool FixUrlBackslashes 277 | { 278 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyFixBackslash); } 279 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyFixBackslash, value); } 280 | } 281 | 282 | /// 283 | /// [fix-bad-comments] Gets or sets whether Tidy should replace unexpected hyphens with "=" characters when it comes across adjacent hyphens. This option is provided for users of Cold Fusion which uses the comment syntax: <!--- --->. Defaults to true. 284 | /// 285 | public bool FixBadComments 286 | { 287 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyFixComments); } 288 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyFixComments, value); } 289 | } 290 | 291 | /// 292 | /// [fix-uri] Gets or sets whether Tidy should check attribute values that carry URIs for illegal characters and if such are found, escape them as HTML 4 recommends. Defaults to true. 293 | /// 294 | public bool FixAttributeUris 295 | { 296 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyFixUri); } 297 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyFixUri, value); } 298 | } 299 | 300 | /// 301 | /// [hide-comments] Gets or sets whether Tidy should print out comments. Defaults to false. 302 | /// 303 | public bool RemoveComments 304 | { 305 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyHideComments); } 306 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyHideComments, value); } 307 | } 308 | 309 | /// 310 | /// [hide-endtags] Gets or sets whether Tidy should omit optional end-tags when generating the pretty printed markup. This option is ignored if you are outputting to XML. Defaults to false. 311 | /// 312 | public bool RemoveEndTags 313 | { 314 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyHideEndTags); } 315 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyHideEndTags, value); } 316 | } 317 | 318 | /// 319 | /// [indent-cdata] Gets or sets whether Tidy should indent <![CDATA[]]> sections. Defaults to false. 320 | /// 321 | public bool IndentCdata 322 | { 323 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyIndentCdata); } 324 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyIndentCdata, value); } 325 | } 326 | 327 | /// 328 | /// [input-xml] Gets or sets whether Tidy use the XML parser rather than the error correcting HTML parser. Defaults to false. 329 | /// 330 | public bool UseXmlParser 331 | { 332 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXmlTags); } 333 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXmlTags, value); } 334 | } 335 | 336 | /// 337 | /// [join-classes] Gets or sets whether Tidy should combine class names to generate a single new class name, if multiple class assignments are detected on an element. Defaults to false. 338 | /// 339 | public bool JoinClasses 340 | { 341 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyJoinClasses); } 342 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyJoinClasses, value); } 343 | } 344 | 345 | /// 346 | /// [join-styles] Gets or sets whether Tidy should combine styles to generate a single new style, if multiple style values are detected on an element. Defaults to true. 347 | /// 348 | public bool JoinStyles 349 | { 350 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyJoinStyles); } 351 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyJoinStyles, value); } 352 | } 353 | 354 | /// 355 | /// [literal-attributes] Gets or sets whether Tidy should ensure that whitespace characters within attribute values are passed through unchanged. Defaults to false. 356 | /// 357 | public bool EnsureLiteralAttributes 358 | { 359 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyLiteralAttribs); } 360 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyLiteralAttribs, value); } 361 | } 362 | 363 | /// 364 | /// [logical-emphasis] Gets or sets whether Tidy should replace any occurrence of <I> by <EM> and any occurrence of <B> by <STRONG>. In both cases, the attributes are preserved unchanged. This option can be set independently of the "MakeClean" and "DropFontTags" properties. Defaults to false. 365 | /// 366 | public bool UseLogicalEmphasis 367 | { 368 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyLogicalEmphasis); } 369 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyLogicalEmphasis, value); } 370 | } 371 | 372 | /// 373 | /// [lower-literals] Gets or sets whether Tidy should convert the value of an attribute that takes a list of predefined values to lower case. This is required for XHTML documents. Defaults to false. 374 | /// 375 | public bool LowerCaseLiterals 376 | { 377 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyLowerLiterals); } 378 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyLowerLiterals, value); } 379 | } 380 | 381 | /// 382 | /// [merge-divs] Gets or sets whether Tidy should merge nested <div> such as "<div><divglt;...</div></div>". If set to "Auto", the attributes of the inner <div> are moved to the outer one. As well, nested <div> with ID attributes are not merged. If set to "Yes", the attributes of the inner <div> are discarded with the exception of "class" and "style". Can be used to modify behavior of the "MakeClean" option. Defaults to Auto. 383 | /// 384 | public AutoBool MergeDivs 385 | { 386 | get { return (AutoBool) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyMergeDivs); } 387 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyMergeDivs, (uint) value); } 388 | } 389 | 390 | /// 391 | /// [merge-spans] Gets or sets whether Tidy should merge nested <span> such as "<span><span;...</span></span>". The algorithm is identical to the one used by MergeDivs. Can be used to modify behavior of the "MakeClean" option. Defaults to "Auto". 392 | /// 393 | public AutoBool MergeSpans 394 | { 395 | // Not available before until 13 Aug 2007 396 | get 397 | { 398 | if (this.ReleaseDate < new DateTime(2007, 8, 13)) 399 | { 400 | Trace.WriteLine("MergeSpans is not supported by your version of tidylib - ignoring."); 401 | return AutoBool.No; 402 | } 403 | return (AutoBool) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyMergeSpans); 404 | } 405 | set 406 | { 407 | if (this.ReleaseDate < new DateTime(2007, 8, 13)) 408 | Trace.WriteLine("MergeSpans is not supported by your version of tidylib - ignoring."); 409 | else 410 | PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyMergeSpans, (uint) value); 411 | } 412 | } 413 | 414 | #if SUPPORT_ASIAN_ENCODINGS 415 | /// 416 | /// [ncr] Gets or sets whether Tidy should allow numeric character references. Defaults to true. 417 | /// 418 | public bool AllowNumericCharacterReferences 419 | { 420 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyNCR); } 421 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyNCR, value); } 422 | } 423 | #endif 424 | 425 | /// 426 | /// [new-blocklevel-tags] Gets or sets new block-level tags. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. Note you can't change the content model for elements such as <TABLE>, <UL>, <OL> and <DL>. This option is ignored in XML mode. 427 | /// 428 | public string NewBlockLevelTags 429 | { 430 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyBlockTags); } 431 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyBlockTags, value); } 432 | } 433 | 434 | /// 435 | /// [new-empty-tags] Gets or sets new empty inline tags. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. This option is ignored in XML mode. 436 | /// 437 | public string NewEmptyInlineTags 438 | { 439 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyEmptyTags); } 440 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyEmptyTags, value); } 441 | } 442 | 443 | /// 444 | /// [new-inline-tags] Gets or sets new non-empty inline tags. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. This option is ignored in XML mode. 445 | /// 446 | public string NewInlineTags 447 | { 448 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyInlineTags); } 449 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyInlineTags, value); } 450 | } 451 | 452 | /// 453 | /// [new-pre-tags] Gets or sets new tags that are to be processed in exactly the same way as HTML's <PRE> element. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. Note you can not as yet add new CDATA elements (similar to <SCRIPT>). This option is ignored in XML mode. 454 | /// 455 | public string NewPreTags 456 | { 457 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyPreTags); } 458 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyPreTags, value); } 459 | } 460 | 461 | /// 462 | /// [numeric-entities] Gets or sets whether Tidy should output entities other than the built-in HTML entities (&amp;, &lt;, &gt; and &quot;) in the numeric rather than the named entity form. Only entities compatible with the DOCTYPE declaration generated are used. Entities that can be represented in the output encoding are translated correspondingly. Defaults to false. 463 | /// 464 | public bool OutputNumericEntities 465 | { 466 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyNumEntities); } 467 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyNumEntities, value); } 468 | } 469 | 470 | /// 471 | /// [output-html] Gets or sets whether Tidy should generate pretty printed output, writing it as HTML. Defaults to false. 472 | /// 473 | public bool OutputHtml 474 | { 475 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyHtmlOut); } 476 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyHtmlOut, value); } 477 | } 478 | 479 | /// 480 | /// [output-xhtml] Gets or sets whether Tidy should generate pretty printed output, writing it as extensible HTML. This option causes Tidy to set the DOCTYPE and default namespace as appropriate to XHTML. If a DOCTYPE or namespace is given they will checked for consistency with the content of the document. In the case of an inconsistency, the corrected values will appear in the output. For XHTML, entities can be written as named or numeric entities according to the setting of the "OutputNumericEntities" value. The original case of tags and attributes will be preserved, regardless of other options. Defaults to false. 481 | /// 482 | public bool OutputXhtml 483 | { 484 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXhtmlOut); } 485 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXhtmlOut, value); } 486 | } 487 | 488 | /// 489 | /// [output-xml] Gets or sets whether Tidy should generate pretty printed output, writing it as well-formed XML. Any entities not defined in XML 1.0 will be written as numeric entities to allow them to be parsed by a XML parser. The original case of tags and attributes will be preserved, regardless of other options. Defaults to false. 490 | /// 491 | public bool OutputXml 492 | { 493 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXmlOut); } 494 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXmlOut, value); } 495 | } 496 | 497 | /// 498 | /// [preserve-entities] Gets or sets whether Tidy should preserve the well-formed entitites as found in the input. Defaults to false. 499 | /// 500 | public bool PreserveEntities 501 | { 502 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyPreserveEntities); } 503 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyPreserveEntities, value); } 504 | } 505 | 506 | /// 507 | /// [quote-ampersand] Gets or sets whether Tidy should output unadorned & characters as &amp;. Defaults to true. 508 | /// 509 | public bool QuoteAmpersands 510 | { 511 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyQuoteAmpersand); } 512 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyQuoteAmpersand, value); } 513 | } 514 | 515 | /// 516 | /// [quote-marks] Gets or sets whether Tidy should output " characters as &quot; as is preferred by some editing environments. The apostrophe character ' is written out as &#39; since many web browsers don't yet support &apos;. Defaults to false. 517 | /// 518 | public bool QuoteMarks 519 | { 520 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyQuoteMarks); } 521 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyQuoteMarks, value); } 522 | } 523 | 524 | /// 525 | /// [quote-nbsp] Gets or sets whether Tidy should output non-breaking space characters as entities, rather than as the Unicode character value 160 (decimal). Defaults to true. 526 | /// 527 | public bool QuoteNonBreakingSpaces 528 | { 529 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyQuoteNbsp); } 530 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyQuoteNbsp, value); } 531 | } 532 | 533 | /// 534 | /// [repeated-attributes] Gets or sets whether Tidy should keep the first or last attribute, if an attribute is repeated, e.g. has two align attributes. Defaults to "KeepLast". 535 | /// 536 | public RepeatedAttributeMode RepeatedAttributeMode 537 | { 538 | get { return (RepeatedAttributeMode) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyDuplicateAttrs); } 539 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyDuplicateAttrs, (uint) value); } 540 | } 541 | 542 | /// 543 | /// [replace-color] Gets or sets whether Tidy should replace numeric values in color attributes by HTML/XHTML color names where defined, e.g. replace "#ffffff" with "white". Defaults to false. 544 | /// 545 | public bool UseColorNames 546 | { 547 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyReplaceColor); } 548 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyReplaceColor, value); } 549 | } 550 | 551 | /// 552 | /// [show-body-only] Gets or sets whether Tidy should print only the contents of the body tag as an HTML fragment. If set to "Auto", this is performed only if the body tag has been inferred. Useful for incorporating existing whole pages as a portion of another page. This option has no effect if XML output is requested. Defaults to "No". 553 | /// 554 | public AutoBool OutputBodyOnly 555 | { 556 | // This option was changed from a Bool to an AutoBool on 24 May 2007. 557 | get 558 | { 559 | if (this.ReleaseDate < new DateTime(2007, 5, 24)) 560 | return (PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyBodyOnly) ? AutoBool.Yes : AutoBool.No); 561 | else 562 | return (AutoBool) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyBodyOnly); 563 | } 564 | set 565 | { 566 | if (this.ReleaseDate < new DateTime(2007, 5, 24)) 567 | PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyBodyOnly, (value == AutoBool.Yes)); 568 | else 569 | PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyBodyOnly, (uint) value); 570 | } 571 | } 572 | 573 | /// 574 | /// [uppercase-attributes] Gets or sets whether Tidy should output attribute names in upper case. The default is false, which results in lower case attribute names, except for XML input, where the original case is preserved. 575 | /// 576 | public bool UpperCaseAttributes 577 | { 578 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyUpperCaseAttrs); } 579 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyUpperCaseAttrs, value); } 580 | } 581 | 582 | /// 583 | /// [uppercase-tags] Gets or sets whether Tidy should output tag names in upper case. The default is false, which results in lower case tag names, except for XML input, where the original case is preserved. 584 | /// 585 | public bool UpperCaseTags 586 | { 587 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyUpperCaseTags); } 588 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyUpperCaseTags, value); } 589 | } 590 | 591 | /// 592 | /// [word-2000] Gets or sets whether Tidy should go to great pains to strip out all the surplus stuff Microsoft Word 2000 inserts when you save Word documents as "Web pages". Doesn't handle embedded images or VML. You should consider using Word's "Save As: Web Page, Filtered". Defaults to false. 593 | /// 594 | public bool CleanWord2000 595 | { 596 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWord2000); } 597 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWord2000, value); } 598 | } 599 | 600 | #endregion 601 | 602 | #region Diagnostics Options 603 | 604 | /// 605 | /// [accessibility-check] Gets or sets the level of accessibility checking, if any, that Tidy should do. Defaults to TidyClassic. 606 | /// 607 | public AccessibilityCheckLevel AccessibilityCheckLevel 608 | { 609 | get { return (AccessibilityCheckLevel) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyAccessibilityCheckLevel); } 610 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyAccessibilityCheckLevel, (uint) value); } 611 | } 612 | 613 | /// 614 | /// [show-errors] Gets or sets the number Tidy uses to determine if further errors should be shown. If set to 0, then no errors are shown. Defaults to 6. 615 | /// 616 | public int MaximumErrors 617 | { 618 | get { return (int) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyShowErrors); } 619 | set 620 | { 621 | if (value < 0) value = 0; 622 | PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyShowErrors, (uint) value); 623 | } 624 | } 625 | 626 | /// 627 | /// [show-warnings] Gets or sets whether Tidy should suppress warnings. This can be useful when a few errors are hidden in a flurry of warnings. Defaults to true. 628 | /// 629 | public bool ShowWarnings 630 | { 631 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyShowWarnings); } 632 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyShowWarnings, value); } 633 | } 634 | 635 | #endregion 636 | 637 | #region Pretty Print Options 638 | 639 | /// 640 | /// [break-before-br] Gets or sets whether Tidy should output a line break before each <BR> element. Defaults to false. 641 | /// 642 | public bool LineBreakBeforeBR 643 | { 644 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyBreakBeforeBR); } 645 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyBreakBeforeBR, value); } 646 | } 647 | 648 | /// 649 | /// [indent] Gets or sets whether Tidy should indent block-level tags. If set to Auto, this option causes Tidy to decide whether or not to indent the content of tags such as TITLE, H1-H6, LI, TD, TD, or P depending on whether or not the content includes a block-level element. You are advised to avoid setting indent to Yes as this can expose layout bugs in some browsers. Defaults to No. 650 | /// 651 | public AutoBool IndentBlockElements 652 | { 653 | get { return (AutoBool) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyIndentContent); } 654 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyIndentContent, (uint) value); } 655 | } 656 | 657 | /// 658 | /// [indent-attributes] Gets or sets whether Tidy should begin each attribute on a new line. Defaults to false. 659 | /// 660 | public bool IndentAttributes 661 | { 662 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyIndentAttributes); } 663 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyIndentAttributes, value); } 664 | } 665 | 666 | /// 667 | /// [indent-spaces] Gets or sets the number of spaces Tidy uses to indent content, when indentation is enabled. Defaults to 2. 668 | /// 669 | public int IndentSpaces 670 | { 671 | get { return (int) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyIndentSpaces); } 672 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyIndentSpaces, (uint) value); } 673 | } 674 | 675 | /// 676 | /// [markup] Gets or sets whether Tidy should generate a pretty printed version of the markup. Note that Tidy won't generate a pretty printed version if it finds significant errors (see ForceOutput). Defaults to true. 677 | /// 678 | public bool Markup 679 | { 680 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyShowMarkup); } 681 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyShowMarkup, value); } 682 | } 683 | 684 | #if SUPPORT_ASIAN_ENCODINGS 685 | /// 686 | /// [punctuation-wrap] Gets or sets whether Tidy should line wrap after some Unicode or Chinese punctuation characters. Defaults to false. 687 | /// 688 | public bool PunctuationWrap 689 | { 690 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyPunctWrap); } 691 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyPunctWrap, value); } 692 | } 693 | #endif 694 | 695 | /// 696 | /// [sort-attributes] Gets or sets how Tidy should sort attributes within an element using the specified sort algorithm. If set to Alpha, the algorithm is an ascending alphabetic sort. Defaults to None. 697 | /// 698 | public SortStrategy AttributeSortType 699 | { 700 | // Not available before until 6 Jun 2007 701 | get 702 | { 703 | if (this.ReleaseDate < new DateTime(2007, 6, 12)) 704 | { 705 | Trace.WriteLine("AttributeSortType is not supported by your version of tidylib - ignoring."); 706 | return SortStrategy.None; 707 | } 708 | return (SortStrategy) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidySortAttributes); 709 | } 710 | set 711 | { 712 | if (this.ReleaseDate < new DateTime(2007, 6, 12)) 713 | Trace.WriteLine("AttributeSortType is not supported by your version of tidylib - ignoring."); 714 | else 715 | PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidySortAttributes, (uint) value); 716 | } 717 | } 718 | 719 | /// 720 | /// [tab-size] Gets or sets the number of columns that Tidy uses between successive tab stops. It is used to map tabs to spaces when reading the input. Tidy never outputs tabs. Defaults to 8. 721 | /// 722 | public int TabSize 723 | { 724 | get { return (int) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyTabSize); } 725 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyTabSize, (uint) value); } 726 | } 727 | 728 | /// 729 | /// [vertical-space] Gets or sets whether Tidy should add some empty lines for readability. Defaults to false. 730 | /// 731 | public bool AddVerticalSpace 732 | { 733 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyVertSpace); } 734 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyVertSpace, value); } 735 | } 736 | 737 | /// 738 | /// [wrap] Gets or sets the right margin Tidy uses for line wrapping. Tidy tries to wrap lines so that they do not exceed this length. Set wrap to zero if you want to disable line wrapping. Defaults to 68. 739 | /// 740 | public int WrapAt 741 | { 742 | get { return (int) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyWrapLen); } 743 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyWrapLen, (uint) value); } 744 | } 745 | 746 | /// 747 | /// [wrap-asp] Gets or sets whether Tidy should line wrap text contained within ASP pseudo elements, which look like: <% ... %>. Defaults to true. 748 | /// 749 | public bool WrapAsp 750 | { 751 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapAsp); } 752 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapAsp, value); } 753 | } 754 | 755 | /// 756 | /// [wrap-attributes] Gets or sets whether Tidy should line wrap attribute values, for easier editing. This option can be set independently of WrapAcriptLiterals. Defaults to false. 757 | /// 758 | public bool WrapAttributeValues 759 | { 760 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapAttVals); } 761 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapAttVals, value); } 762 | } 763 | 764 | /// 765 | /// [wrap-jste] Gets or sets whether Tidy should line wrap text contained within JSTE pseudo elements, which look like: <# ... #>. Defaults to true. 766 | /// 767 | public bool WrapJste 768 | { 769 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapJste); } 770 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapJste, value); } 771 | } 772 | 773 | /// 774 | /// [wrap-php] Gets or sets whether Tidy should line wrap text contained within PHP pseudo elements, which look like: <?php ... ?>. Defaults to true. 775 | /// 776 | public bool WrapPhp 777 | { 778 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapPhp); } 779 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapPhp, value); } 780 | } 781 | 782 | /// 783 | /// [wrap-script-literals] Gets or sets whether Tidy should line wrap string literals that appear in script attributes. Tidy wraps long script string literals by inserting a backslash character before the line break. Defaults to false. 784 | /// 785 | public bool WrapScriptLiterals 786 | { 787 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapScriptlets); } 788 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapScriptlets, value); } 789 | } 790 | 791 | /// 792 | /// [wrap-sections] Gets or sets whether Tidy should line wrap text contained within <![ ... ]> section tags. Defaults to true. 793 | /// 794 | public bool WrapSections 795 | { 796 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapSection); } 797 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapSection, value); } 798 | } 799 | 800 | #endregion 801 | 802 | #region Character Encoding Options 803 | 804 | /// 805 | /// [ascii-chars] Gets or sets whether &emdash;, &rdquo;, and other named character entities are downgraded to their closest ascii equivalents when the "MakeClean" option is set to true. Defaults to false. 806 | /// 807 | public bool AsciiEntities 808 | { 809 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyAsciiChars); } 810 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyAsciiChars, value); } 811 | } 812 | 813 | /// 814 | /// [char-encoding] Gets or sets character encoding Tidy uses for both the input and output. For ascii, Tidy will accept Latin-1 (ISO-8859-1) character values, but will use entities for all characters whose value > 127. For raw, Tidy will output values above 127 without translating them into entities. For latin1, characters above 255 will be written as entities. For utf8, Tidy assumes that both input and output is encoded as UTF-8. You can use iso2022 for files encoded using the ISO-2022 family of encodings e.g. ISO-2022-JP. For mac and win1252, Tidy will accept vendor specific character values, but will use entities for all characters whose value > 127. For unsupported encodings, use an external utility to convert to and from UTF-8. Defaults to "Ascii". 815 | /// 816 | public EncodingType CharacterEncoding 817 | { 818 | get { return (EncodingType) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyCharEncoding); } 819 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyCharEncoding, (uint) value); } 820 | } 821 | 822 | /// 823 | /// [input-encoding] Gets or sets character encoding Tidy uses for the input. See CharacterEncoding for more info. Defaults to "Latin1". 824 | /// 825 | public EncodingType InputCharacterEncoding 826 | { 827 | get { return (EncodingType) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyInCharEncoding); } 828 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyInCharEncoding, (uint) value); } 829 | } 830 | 831 | /// 832 | /// [newline] Gets or sets the type of newline. The default is appropriate to the current platform: CRLF on PC-DOS, MS-Windows and OS/2, CR on Classic Mac OS, and LF everywhere else (Unix and Linux). 833 | /// 834 | public NewlineType NewLine 835 | { 836 | get { return (NewlineType) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyNewline); } 837 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyNewline, (uint) value); } 838 | } 839 | 840 | #if SUPPORT_UTF16_ENCODINGS 841 | /// 842 | /// [output-bom] Gets or sets whether Tidy should write a Unicode Byte Order Mark character (BOM; also known as Zero Width No-Break Space; has value of U+FEFF) to the beginning of the output; only for UTF-8 and UTF-16 output encodings. If set to "auto", this option causes Tidy to write a BOM to the output only if a BOM was present at the beginning of the input. A BOM is always written for XML/XHTML output using UTF-16 output encodings. Defaults to "Auto". 843 | /// 844 | public AutoBool OutputByteOrderMark 845 | { 846 | get { return (AutoBool) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyOutputBOM); } 847 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyOutputBOM, (uint) value); } 848 | } 849 | #endif 850 | 851 | /// 852 | /// [output-encoding] Gets or sets character encoding Tidy uses for the output. See CharacterEncoding for more info. May only be different from input-encoding for Latin encodings (ascii, latin0, latin1, mac, win1252, ibm858). Defaults to "Ascii". 853 | /// 854 | public EncodingType OutputCharacterEncoding 855 | { 856 | get { return (EncodingType) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyOutCharEncoding); } 857 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyOutCharEncoding, (uint) value); } 858 | } 859 | 860 | #endregion 861 | 862 | #region Miscellaneous Options 863 | 864 | /// 865 | /// [error-file] Gets or sets the error file Tidy uses for errors and warnings. Normally errors and warnings are output to "stderr". Defaults to null. 866 | /// 867 | public string ErrorFile 868 | { 869 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyErrFile); } 870 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyErrFile, value); } 871 | } 872 | 873 | /// 874 | /// [force-output] Gets or sets whether Tidy should produce output even if errors are encountered. Use this option with care - if Tidy reports an error, this means Tidy was not able to, or is not sure how to, fix the error, so the resulting output may not reflect your intention. Defaults to false. 875 | /// 876 | public bool ForceOutput 877 | { 878 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyForceOutput); } 879 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyForceOutput, value); } 880 | } 881 | 882 | /// 883 | /// [gnu-emacs] Gets or sets whether Tidy should change the format for reporting errors and warnings to a format that is more easily parsed by GNU Emacs. Defaults to false. 884 | /// 885 | public bool UseGnuEmacsErrorFormat 886 | { 887 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyEmacs); } 888 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyEmacs, value); } 889 | } 890 | 891 | /// 892 | /// [keep-time] Gets or sets whether Tidy should keep the original modification time of files that Tidy modifies in place. The default is no. Setting the option to yes allows you to tidy files without causing these files to be uploaded to a web server when using a tool such as SiteCopy. Note this feature is not supported on some platforms. Defaults to false. 893 | /// 894 | public bool KeepModificationTimestamp 895 | { 896 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyKeepFileTimes); } 897 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyKeepFileTimes, value); } 898 | } 899 | 900 | /// 901 | /// [output-file] Gets or sets the output file Tidy uses for markup. Normally markup is written to "stdout". Defaults to null. 902 | /// 903 | public string OutputFile 904 | { 905 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyOutFile); } 906 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyOutFile, value); } 907 | } 908 | 909 | /// 910 | /// [quiet] Gets or sets whether Tidy should output the summary of the numbers of errors and warnings, or the welcome or informational messages. Defaults to false. 911 | /// 912 | public bool Quiet 913 | { 914 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyQuiet); } 915 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyQuiet, value); } 916 | } 917 | 918 | /// 919 | /// [tidy-mark] Gets or sets whether Tidy should add a meta element to the document head to indicate that the document has been tidied. Tidy won't add a meta element if one is already present. Defaults to true. 920 | /// 921 | public bool AddTidyMetaElement 922 | { 923 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyMark); } 924 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyMark, value); } 925 | } 926 | 927 | /// 928 | /// [write-back] Gets or sets whether Tidy should write back the tidied markup to the same file it read from. You are advised to keep copies of important files before tidying them, as on rare occasions the result may not be what you expect. Defaults to false. 929 | /// 930 | public bool WriteBack 931 | { 932 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWriteBack); } 933 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWriteBack, value); } 934 | } 935 | 936 | #endregion 937 | 938 | #endregion 939 | 940 | #region Methods 941 | 942 | /// 943 | /// Parses input markup, and executes configured cleanup and repair operations. 944 | /// 945 | public void CleanAndRepair() 946 | { 947 | if (fromString) 948 | { 949 | EncodingType tempEnc = this.InputCharacterEncoding; 950 | this.InputCharacterEncoding = EncodingType.Utf8; 951 | PInvoke.tidyParseString(this.handle, this.htmlString); 952 | this.InputCharacterEncoding = tempEnc; 953 | } 954 | else 955 | { 956 | InputSource input = new InputSource(this.stream); 957 | PInvoke.tidyParseSource(this.handle, ref input.TidyInputSource); 958 | } 959 | PInvoke.tidyCleanAndRepair(this.handle); 960 | cleaned = true; 961 | } 962 | 963 | /// 964 | /// Saves the processed markup to a string. 965 | /// 966 | /// A string containing the processed markup. 967 | public string Save() 968 | { 969 | if (!cleaned) 970 | throw new InvalidOperationException("CleanAndRepair() must be called before Save()."); 971 | 972 | var tempEnc = this.CharacterEncoding; 973 | var tempBOM = this.OutputByteOrderMark; 974 | this.OutputCharacterEncoding = EncodingType.Utf8; 975 | this.OutputByteOrderMark = AutoBool.No; 976 | 977 | uint bufferLength = 1; 978 | byte[] htmlBytes; 979 | GCHandle handle = new GCHandle(); 980 | do 981 | { 982 | // Buffer was too small - bufferLength should now be the required length, so try again... 983 | if (handle.IsAllocated) handle.Free(); 984 | 985 | // this setting appears to be reset by libtidy after calling tidySaveString; we need to set it each time 986 | this.OutputCharacterEncoding = EncodingType.Utf8; 987 | 988 | htmlBytes = new byte[bufferLength]; 989 | handle = GCHandle.Alloc(htmlBytes, GCHandleType.Pinned); 990 | } while (PInvoke.tidySaveString(this.handle, handle.AddrOfPinnedObject(), ref bufferLength) == -12); 991 | 992 | handle.Free(); 993 | 994 | this.OutputCharacterEncoding = tempEnc; 995 | this.OutputByteOrderMark = tempBOM; 996 | return Encoding.UTF8.GetString(htmlBytes); 997 | } 998 | 999 | /// 1000 | /// Saves the processed markup to a file. 1001 | /// 1002 | /// The full filesystem path of the file to save the markup to. 1003 | public void Save(string filePath) 1004 | { 1005 | if (!cleaned) 1006 | throw new InvalidOperationException("CleanAndRepair() must be called before Save()."); 1007 | 1008 | PInvoke.tidySaveFile(this.handle, filePath); 1009 | } 1010 | 1011 | /// 1012 | /// Saves the processed markup to the supplied stream. 1013 | /// 1014 | /// A to write the markup to. 1015 | public void Save(Stream stream) 1016 | { 1017 | if (!cleaned) 1018 | throw new InvalidOperationException("CleanAndRepair() must be called before Save()."); 1019 | 1020 | EncodingType tempEnc = this.OutputCharacterEncoding; 1021 | if (fromString) this.OutputCharacterEncoding = EncodingType.Utf8; 1022 | OutputSink sink = new OutputSink(stream); 1023 | PInvoke.tidySaveSink(this.handle, ref sink.TidyOutputSink); 1024 | if (fromString) this.OutputCharacterEncoding = tempEnc; 1025 | } 1026 | 1027 | #endregion 1028 | 1029 | #region Static Methods 1030 | 1031 | /// 1032 | /// Creates a new instance from a containing HTML. 1033 | /// 1034 | /// The HTML string to be processed. 1035 | public static Document FromString(string htmlString) 1036 | { 1037 | if (htmlString == null) 1038 | throw new ArgumentNullException("htmlString"); 1039 | 1040 | return new Document(htmlString); 1041 | } 1042 | 1043 | /// 1044 | /// Creates a new instance from a file. 1045 | /// 1046 | /// The full filesystem path of the HTML document to be processed. 1047 | public static Document FromFile(string filePath) 1048 | { 1049 | if (!File.Exists(filePath)) 1050 | throw new FileNotFoundException("File not found.", filePath); 1051 | 1052 | return new Document(new FileStream(filePath, FileMode.Open)); 1053 | } 1054 | 1055 | /// 1056 | /// Creates a new instance from a instance. 1057 | /// 1058 | /// A instance containing the HTML document to be processed. 1059 | public static Document FromStream(Stream stream) 1060 | { 1061 | if (stream == null) 1062 | throw new ArgumentNullException("stream"); 1063 | if (!stream.CanRead) 1064 | throw new ArgumentException("Stream must be readable."); 1065 | if (!stream.CanSeek) 1066 | throw new ArgumentException("Stream must be seekable."); 1067 | 1068 | return new Document(stream); 1069 | } 1070 | 1071 | #endregion 1072 | 1073 | #region IDisposable Members 1074 | 1075 | /// 1076 | /// Disposes of all unmanaged resources. 1077 | /// 1078 | public void Dispose() 1079 | { 1080 | this.Dispose(true); 1081 | GC.SuppressFinalize(this); 1082 | } 1083 | 1084 | /// 1085 | /// Disposes of all unmanaged resources. 1086 | /// 1087 | /// Indicates whether the the document is already being disposed of. 1088 | protected virtual void Dispose(bool disposing) 1089 | { 1090 | if (!this.disposed) 1091 | { 1092 | if (disposing) 1093 | { 1094 | if (this.stream != null) this.stream.Dispose(); 1095 | PInvoke.tidyRelease(this.handle); 1096 | } 1097 | this.handle = IntPtr.Zero; 1098 | this.stream = null; 1099 | this.disposed = true; 1100 | } 1101 | } 1102 | 1103 | #endregion 1104 | } 1105 | } 1106 | --------------------------------------------------------------------------------