├── Interop
├── .gitignore
├── TidyEOFFunc.cs
├── TidyGetByteFunc.cs
├── TidyPutByteFunc.cs
├── TidyUngetByteFunc.cs
├── TidyOutputSink.cs
├── TidyInputSource.cs
├── PInvoke.cs
└── TidyOptionId.cs
├── .gitignore
├── TidyManaged.sln
├── SortStrategy.cs
├── RepeatedAttributeMode.cs
├── AutoBool.cs
├── OutputSink.cs
├── NewlineType.cs
├── AccessibilityCheckLevel.cs
├── DocTypeMode.cs
├── InputSource.cs
├── AssemblyInfo.cs
├── TidyManaged.csproj
├── EncodingType.cs
├── README.md
└── Document.cs
/Interop/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | bin
3 | *.userprefs
4 | *.pidb
5 | TestHarness
6 |
--------------------------------------------------------------------------------
/TidyManaged.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 10.00
3 | # Visual Studio 2008
4 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TidyManaged", "TidyManaged.csproj", "{D799633D-00EF-437C-B158-315557D930FC}"
5 | EndProject
6 | Global
7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
8 | Debug|Any CPU = Debug|Any CPU
9 | Release|Any CPU = Release|Any CPU
10 | EndGlobalSection
11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
12 | {D799633D-00EF-437C-B158-315557D930FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
13 | {D799633D-00EF-437C-B158-315557D930FC}.Debug|Any CPU.Build.0 = Debug|Any CPU
14 | {D799633D-00EF-437C-B158-315557D930FC}.Release|Any CPU.ActiveCfg = Release|Any CPU
15 | {D799633D-00EF-437C-B158-315557D930FC}.Release|Any CPU.Build.0 = Release|Any CPU
16 | EndGlobalSection
17 | GlobalSection(MonoDevelopProperties) = preSolution
18 | StartupItem = TidyManaged.csproj
19 | EndGlobalSection
20 | GlobalSection(SolutionProperties) = preSolution
21 | HideSolutionNode = FALSE
22 | EndGlobalSection
23 | EndGlobal
24 |
--------------------------------------------------------------------------------
/Interop/TidyEOFFunc.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 |
26 | namespace TidyManaged.Interop
27 | {
28 | internal delegate bool TidyEOFFunc(IntPtr sinkData);
29 | }
30 |
--------------------------------------------------------------------------------
/Interop/TidyGetByteFunc.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 |
26 | namespace TidyManaged.Interop
27 | {
28 | internal delegate byte TidyGetByteFunc(IntPtr sinkData);
29 | }
30 |
--------------------------------------------------------------------------------
/Interop/TidyPutByteFunc.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 |
26 | namespace TidyManaged.Interop
27 | {
28 | internal delegate void TidyPutByteFunc(IntPtr sinkData, byte bt);
29 | }
30 |
--------------------------------------------------------------------------------
/Interop/TidyUngetByteFunc.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 |
26 | namespace TidyManaged.Interop
27 | {
28 | internal delegate void TidyUngetByteFunc(IntPtr sinkData, byte bt);
29 | }
30 |
--------------------------------------------------------------------------------
/SortStrategy.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | namespace TidyManaged
25 | {
26 | ///
27 | /// Represents values used by some Tidy properties.
28 | ///
29 | public enum SortStrategy
30 | {
31 | ///
32 | /// None.
33 | ///
34 | None,
35 | ///
36 | /// Alpha.
37 | ///
38 | Alpha
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/RepeatedAttributeMode.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | namespace TidyManaged
25 | {
26 | ///
27 | /// Represents the available repeated-attribute handling modes.
28 | ///
29 | public enum RepeatedAttributeMode
30 | {
31 | ///
32 | /// Keep the first attribute.
33 | ///
34 | KeepFirst,
35 |
36 | ///
37 | /// Keep the last attribute.
38 | ///
39 | KeepLast
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/AutoBool.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | namespace TidyManaged
25 | {
26 | ///
27 | /// Represents yes/no/auto values used by some Tidy properties.
28 | ///
29 | public enum AutoBool
30 | {
31 | ///
32 | /// No.
33 | ///
34 | No,
35 | ///
36 | /// Yes.
37 | ///
38 | Yes,
39 | ///
40 | /// Automatic.
41 | ///
42 | Auto
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/Interop/TidyOutputSink.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 | using System.Runtime.InteropServices;
26 |
27 | namespace TidyManaged.Interop
28 | {
29 | internal struct TidyOutputSink
30 | {
31 | internal TidyOutputSink(TidyPutByteFunc putByte)
32 | {
33 | this.sinkData = IntPtr.Zero;
34 | this.putByte = putByte;
35 | }
36 |
37 | #pragma warning disable 0414
38 | IntPtr sinkData;
39 |
40 | [MarshalAs(UnmanagedType.FunctionPtr)]
41 | TidyPutByteFunc putByte;
42 | #pragma warning restore 0414
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/OutputSink.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 | using System.IO;
26 |
27 | namespace TidyManaged
28 | {
29 | internal class OutputSink
30 | {
31 | internal OutputSink(Stream stream)
32 | {
33 | this.stream = stream;
34 | this.TidyOutputSink = new Interop.TidyOutputSink(new Interop.TidyPutByteFunc(OnPutByte));
35 | }
36 |
37 | Stream stream;
38 | internal Interop.TidyOutputSink TidyOutputSink;
39 |
40 | void OnPutByte(IntPtr sinkData, byte bt)
41 | {
42 | this.stream.WriteByte(bt);
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/NewlineType.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | namespace TidyManaged
25 | {
26 | ///
27 | /// Represents the available newline types.
28 | ///
29 | public enum NewlineType
30 | {
31 | ///
32 | /// LF (used by Mac OS X, Unix, Linux).
33 | ///
34 | Linefeed,
35 |
36 | ///
37 | /// CRLF (used by Microsoft Windows, DOS etc).
38 | ///
39 | CarriageReturnLinefeed,
40 |
41 | ///
42 | /// CR (used by Mac OS 9 and earlier).
43 | ///
44 | CarriageReturn
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/AccessibilityCheckLevel.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | namespace TidyManaged
25 | {
26 | ///
27 | /// Represents the available accessibility check levels.
28 | ///
29 | public enum AccessibilityCheckLevel
30 | {
31 | ///
32 | /// Equivalent to Tidy Classic's accessibility checking.
33 | ///
34 | TidyClassic = 0,
35 | ///
36 | /// Priority 1.
37 | ///
38 | Priority1 = 1,
39 | ///
40 | /// Priority 2.
41 | ///
42 | Priority2 = 2,
43 | ///
44 | /// Priority 3.
45 | ///
46 | Priority3 = 3
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/DocTypeMode.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | namespace TidyManaged
25 | {
26 | ///
27 | /// Represents the available accessibility DOCTYPE modes.
28 | ///
29 | public enum DocTypeMode
30 | {
31 | ///
32 | /// Omit DOCTYPE altogether.
33 | ///
34 | Omit,
35 |
36 | ///
37 | /// Keep DOCTYPE in input. Set version to content.
38 | ///
39 | Auto,
40 |
41 | ///
42 | /// Convert document to strict content model.
43 | ///
44 | Strict,
45 |
46 | ///
47 | /// Convert document to transitional content model.
48 | ///
49 | Loose,
50 |
51 | ///
52 | /// User-specified doctype.
53 | ///
54 | User
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/Interop/TidyInputSource.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 | using System.Runtime.InteropServices;
26 |
27 | namespace TidyManaged.Interop
28 | {
29 | internal struct TidyInputSource
30 | {
31 | internal TidyInputSource(TidyGetByteFunc getByte, TidyUngetByteFunc ungetByte, TidyEOFFunc eof)
32 | {
33 | this.sourceData = IntPtr.Zero;
34 | this.getByte = getByte;
35 | this.ungetByte = ungetByte;
36 | this.eof = eof;
37 | }
38 |
39 | #pragma warning disable 0414
40 | IntPtr sourceData;
41 |
42 | [MarshalAs(UnmanagedType.FunctionPtr)]
43 | TidyGetByteFunc getByte;
44 |
45 | [MarshalAs(UnmanagedType.FunctionPtr)]
46 | TidyUngetByteFunc ungetByte;
47 |
48 | [MarshalAs(UnmanagedType.FunctionPtr)]
49 | TidyEOFFunc eof;
50 | #pragma warning restore 0414
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/InputSource.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 | using System.IO;
26 |
27 | namespace TidyManaged
28 | {
29 | internal class InputSource
30 | {
31 | internal InputSource(Stream stream)
32 | {
33 | this.stream = stream;
34 | this.TidyInputSource = new Interop.TidyInputSource(new Interop.TidyGetByteFunc(OnGetByte), new Interop.TidyUngetByteFunc(OnUngetByte), new Interop.TidyEOFFunc(OnEOF));
35 | }
36 |
37 | Stream stream;
38 | internal Interop.TidyInputSource TidyInputSource;
39 |
40 | byte OnGetByte(IntPtr sinkData)
41 | {
42 | return (byte) this.stream.ReadByte();
43 | }
44 |
45 | void OnUngetByte(IntPtr sinkData, byte bt)
46 | {
47 | if (this.stream.Position > 0) this.stream.Position--;
48 | }
49 |
50 | bool OnEOF(IntPtr sinkData)
51 | {
52 | return (this.stream.Position >= this.stream.Length);
53 | }
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System.Reflection;
25 | using System.Runtime.CompilerServices;
26 | using System.Runtime.InteropServices;
27 |
28 | [assembly: AssemblyTitle("TidyManaged")]
29 | [assembly: AssemblyDescription("Managed .NET wrapper for the HTML Tidy library")]
30 | [assembly: AssemblyConfiguration("")]
31 | [assembly: AssemblyCompany("Mark Beaton")]
32 | [assembly: AssemblyProduct("TidyManaged")]
33 | [assembly: AssemblyCopyright("Copyright © Mark Beaton 2009")]
34 | [assembly: AssemblyTrademark("")]
35 | [assembly: AssemblyCulture("")]
36 |
37 | // Setting ComVisible to false makes the types in this assembly not visible
38 | // to COM components. If you need to access a type in this assembly from
39 | // COM, set the ComVisible attribute to true on that type.
40 | [assembly: ComVisible(false)]
41 |
42 | // The following GUID is for the ID of the typelib if this project is exposed to COM
43 | [assembly: Guid("1c09c222-dbe1-44b3-8983-d4116ec3e051")]
44 |
45 | // Version information for an assembly consists of the following four values:
46 | //
47 | // Major Version
48 | // Minor Version
49 | // Build Number
50 | // Revision
51 | //
52 | // You can specify all the values or you can default the Revision and Build Numbers
53 | // by using the '*' as shown below:
54 | [assembly: AssemblyVersion("1.0.0.0")]
55 | [assembly: AssemblyFileVersion("1.0.0.0")]
56 |
--------------------------------------------------------------------------------
/TidyManaged.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Debug
5 | AnyCPU
6 | 9.0.30729
7 | 2.0
8 | {D799633D-00EF-437C-B158-315557D930FC}
9 | Library
10 | Properties
11 | TidyManaged
12 | TidyManaged
13 | v2.0
14 | 512
15 |
16 |
17 | true
18 | full
19 | false
20 | bin\Debug\
21 | TRACE;DEBUG;SUPPORT_UTF16_ENCODINGS;SUPPORT_ASIAN_ENCODINGS;
22 | prompt
23 | 4
24 |
25 |
26 | none
27 | true
28 | bin\Release\
29 | TRACE;SUPPORT_UTF16_ENCODINGS;SUPPORT_ASIAN_ENCODINGS;
30 | prompt
31 | 4
32 | bin\Release\TidyManaged.xml
33 | true
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
69 |
--------------------------------------------------------------------------------
/Interop/PInvoke.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 | using System.Runtime.InteropServices;
26 |
27 | namespace TidyManaged.Interop
28 | {
29 | internal class PInvoke
30 | {
31 | [DllImport("libtidy.dll")]
32 | internal static extern IntPtr tidyCreate();
33 |
34 | [DllImport("libtidy.dll")]
35 | internal static extern void tidyRelease(IntPtr tdoc);
36 |
37 | [DllImport("libtidy.dll")]
38 | internal static extern IntPtr tidyReleaseDate();
39 |
40 | [DllImport("libtidy.dll")]
41 | internal static extern IntPtr tidyOptGetValue(IntPtr tdoc, TidyOptionId optId);
42 |
43 | [DllImport("libtidy.dll")]
44 | internal static extern bool tidyOptSetValue(IntPtr tdoc, TidyOptionId optId, string val);
45 |
46 | [DllImport("libtidy.dll")]
47 | internal static extern uint tidyOptGetInt(IntPtr tdoc, TidyOptionId optId);
48 |
49 | [DllImport("libtidy.dll")]
50 | internal static extern bool tidyOptSetInt(IntPtr tdoc, TidyOptionId optId, uint val);
51 |
52 | [DllImport("libtidy.dll")]
53 | internal static extern bool tidyOptGetBool(IntPtr tdoc, TidyOptionId optId);
54 |
55 | [DllImport("libtidy.dll")]
56 | internal static extern bool tidyOptSetBool(IntPtr tdoc, TidyOptionId optId, bool val);
57 |
58 | [DllImport("libtidy.dll")]
59 | internal static extern int tidyParseFile(IntPtr tdoc, string filename);
60 |
61 | [DllImport("libtidy.dll")]
62 | internal static extern int tidyParseString(IntPtr tdoc, string content);
63 |
64 | [DllImport("libtidy.dll")]
65 | internal static extern int tidyParseSource(IntPtr tdoc, ref TidyInputSource source);
66 |
67 | [DllImport("libtidy.dll")]
68 | internal static extern int tidyCleanAndRepair(IntPtr tdoc);
69 |
70 | [DllImport("libtidy.dll")]
71 | internal static extern int tidySaveFile(IntPtr tdoc, string filname);
72 |
73 | [DllImport("libtidy.dll")]
74 | internal static extern int tidySaveString(IntPtr tdoc, IntPtr buffer, ref uint buflen);
75 |
76 | [DllImport("libtidy.dll")]
77 | internal static extern int tidySaveSink(IntPtr tdoc, ref TidyOutputSink sink);
78 |
79 | internal static string tidyOptGetValueString(IntPtr tdoc, TidyOptionId optId)
80 | {
81 | return Marshal.PtrToStringAnsi(tidyOptGetValue(tdoc, optId));
82 | }
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/EncodingType.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | namespace TidyManaged
25 | {
26 | ///
27 | /// Represents the supported encodings.
28 | ///
29 | public enum EncodingType
30 | {
31 | ///
32 | /// No or unknown encoding.
33 | ///
34 | Raw = 0,
35 |
36 | ///
37 | /// The American Standard Code for Information Interchange (ASCII) encoding scheme.
38 | ///
39 | Ascii = 1,
40 |
41 | ///
42 | /// The ISO/IEC 8859-15 encoding scheme, also knows as Latin-0 and Latin-9.
43 | ///
44 | Latin0 = 2,
45 |
46 | ///
47 | /// The ISO/IEC 8859-1 encoding scheme, also knows as Latin-1.
48 | ///
49 | Latin1 = 3,
50 |
51 | ///
52 | /// The UTF-8 encoding scheme.
53 | ///
54 | Utf8 = 4,
55 |
56 | ///
57 | /// The ISO/IEC 2022 encoding scheme.
58 | ///
59 | Iso2022 = 5,
60 |
61 | ///
62 | /// The MacRoman encoding scheme.
63 | ///
64 | MacRoman = 6,
65 |
66 | ///
67 | /// The Windows-1252 encoding scheme.
68 | ///
69 | Win1252 = 7,
70 |
71 | ///
72 | /// The Code page 858 encoding scheme, also know as CP 858, IBM 858, or OEM 858.
73 | ///
74 | Ibm858 = 8,
75 |
76 | #if SUPPORT_UTF16_ENCODINGS
77 |
78 | ///
79 | /// The UTF-16LE (Little Endian) encoding scheme.
80 | ///
81 | Utf16LittleEndian = 9,
82 |
83 | ///
84 | /// The UTF-16BE (Big Endian) encoding scheme.
85 | ///
86 | Utf16BigEndian = 10,
87 |
88 | ///
89 | /// The UTF-16 encoding scheme, with endianess detected using a BOM.
90 | ///
91 | Utf16 = 11,
92 |
93 | #endif
94 |
95 | #if SUPPORT_ASIAN_ENCODINGS
96 | #if SUPPORT_UTF16_ENCODINGS
97 |
98 | ///
99 | /// The Big-5 or Big5 encoding scheme, used in Taiwan, Hong Kong, and Macau for Traditional Chinese characters.
100 | ///
101 | Big5 = 12,
102 |
103 | ///
104 | /// The Shift JIS encoding scheme for Japanese characters.
105 | ///
106 | ShiftJIS = 13
107 |
108 | #else
109 |
110 | ///
111 | /// The Big-5 or Big5 encoding scheme, used in Taiwan, Hong Kong, and Macau for Traditional Chinese characters.
112 | ///
113 | Big5 = 9,
114 |
115 | ///
116 | /// The Shift JIS encoding scheme for Japanese characters.
117 | ///
118 | ShiftJIS = 10
119 |
120 | #endif
121 | #endif
122 | }
123 | }
124 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TidyManaged
2 |
3 | This is a managed .NET/Mono wrapper for the open source, cross-platform Tidy library, a HTML/XHTML/XML markup parser & cleaner originally created by Dave Raggett.
4 |
5 | I'm not going to explain Tidy's "raison d'être" - please read [Dave Raggett's original web page](http://www.w3.org/People/Raggett/tidy/) for more information, or the [SourceForge project](http://tidy.sourceforge.net/) that has taken over maintenance of the library.
6 |
7 | ## libtidy
8 |
9 | This wrapper is written in C#, and makes use of .NET platform invoke (p/invoke) functionality to interoperate with the Tidy library "libtidy" (written in portable ANSI C).
10 |
11 | Therefore, you'll also need a build of the binary appropriate for your platform. If you're after a 32 or 64 bit Windows build, or you want a more recent build for Mac OS X than the one that is bundled with the OS, try these:
12 |
13 | - [Windows 32-bit build](http://wemakeapps.net/downloads/TidyManaged/libtidy.dll.Win32.zip)
14 | - [Windows 64-bit build](http://wemakeapps.net/downloads/TidyManaged/libtidy.dll.Win64.zip)
15 | - [Mac x64/x86/PPC fat binary](http://wemakeapps.net/downloads/TidyManaged/libtidy.dylib.zip) - this is a newer build (25 March 2009) than the version included in default OS X installations.
16 |
17 | Otherwise, grab the latest source from the [SourceForge project](http://tidy.sourceforge.net/), and roll your own.
18 |
19 | ## Sample Usage
20 |
21 | Here's a quick'n'dirty example using a simple console app.
22 | Note: always remember to .Dispose() of your Document instance (or wrap it in a "using" statement), so the interop layer can clean up any unmanaged resources (memory, file handles etc) when it's done cleaning.
23 |
24 | using System;
25 | using TidyManaged;
26 |
27 | public class Test
28 | {
29 | public static void Main(string[] args)
30 | {
31 | using (Document doc = Document.FromString("
testasd"))
32 | {
33 | doc.ShowWarnings = false;
34 | doc.Quiet = true;
35 | doc.OutputXhtml = true;
36 | doc.CleanAndRepair();
37 | string parsed = doc.Save();
38 | Console.WriteLine(parsed);
39 | }
40 | }
41 | }
42 |
43 | results in:
44 |
45 |
47 |
48 |
49 |
51 | test
52 |
53 |
54 | asd
55 |
56 |
57 |
58 | ## Notes for non-Windows platforms
59 |
60 | Thanks to the platform-agnostic nature of ANSI C, and the excellent work of the people at the [Mono Project](http://www.mono-project.com/), you can use this wrapper library anywhere that Mono is supported, assuming you can have (or can build) a version of the underlying Tidy library for your platform. That shouldn't be too hard - it's a default part of a standard Mac OS X install, for example; it probably is for most Linux distributions as well.
61 |
62 | Under Mono, you might need to re-map the p/invoke calls to the appropriate library - or you might find it just works. See [this page on DLL mapping](http://www.mono-project.com/Config_DllMap) for more information on achieving this. Note: the .config file needs to be configured for the TidyManaged DLL, NOT your application's binary.
63 |
64 | ### Example TidyManaged.dll.config
65 |
66 |
67 |
68 |
69 |
70 | ## The API
71 |
72 | At this stage I've just created a basic mapping of each of the configuration options made available by Tidy to properties of the main Document object - I've renamed a few things here & there, but it should be pretty easy to figure out what each property does (the documentation included in the code includes the original Tidy option name for each property). You can read the [Tidy configuration documentation here](http://tidy.sourceforge.net/docs/quickref.html).
73 |
74 | ## The Future
75 |
76 | At some point I'll add a nicer ".NET-style" API layer over the top, as it's a bit clunky (although perfectly usable) at the moment.
77 |
--------------------------------------------------------------------------------
/Interop/TidyOptionId.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 |
26 | namespace TidyManaged.Interop
27 | {
28 | internal enum TidyOptionId
29 | {
30 | TidyUnknownOption, /*< Unknown option! */
31 | TidyIndentSpaces, /*< Indentation n spaces */
32 | TidyWrapLen, /*< Wrap margin */
33 | TidyTabSize, /*< Expand tabs to n spaces */
34 | TidyCharEncoding, /*< In/out character encoding */
35 | TidyInCharEncoding, /*< Input character encoding (if different) */
36 | TidyOutCharEncoding, /*< Output character encoding (if different) */
37 | TidyNewline, /*< Output line ending (default to platform) */
38 | TidyDoctypeMode, /*< See doctype property */
39 | TidyDoctype, /*< User specified doctype */
40 | TidyDuplicateAttrs, /*< Keep first or last duplicate attribute */
41 | TidyAltText, /*< Default text for alt attribute */
42 |
43 | [Obsolete]
44 | TidySlideStyle, /*< Style sheet for slides: not used for anything yet */
45 |
46 | TidyErrFile, /*< File name to write errors to */
47 | TidyOutFile, /*< File name to write markup to */
48 | TidyWriteBack, /*< If true then output tidied markup */
49 | TidyShowMarkup, /*< If false, normal output is suppressed */
50 | TidyShowWarnings, /*< However errors are always shown */
51 | TidyQuiet, /*< No 'Parsing X', guessed DTD or summary */
52 | TidyIndentContent, /*< Indent content of appropriate tags */
53 | /*< "auto" does text/block level content indentation */
54 | TidyHideEndTags, /*< Suppress optional end tags */
55 | TidyXmlTags, /*< Treat input as XML */
56 | TidyXmlOut, /*< Create output as XML */
57 | TidyXhtmlOut, /*< Output extensible HTML */
58 | TidyHtmlOut, /*< Output plain HTML, even for XHTML input.
59 | Yes means set explicitly. */
60 | TidyXmlDecl, /*< Add for XML docs */
61 | TidyUpperCaseTags, /*< Output tags in upper not lower case */
62 | TidyUpperCaseAttrs, /*< Output attributes in upper not lower case */
63 | TidyMakeBare, /*< Make bare HTML: remove Microsoft cruft */
64 | TidyMakeClean, /*< Replace presentational clutter by style rules */
65 | TidyLogicalEmphasis, /*< Replace i by em and b by strong */
66 | TidyDropPropAttrs, /*< Discard proprietary attributes */
67 | TidyDropFontTags, /*< Discard presentation tags */
68 | TidyDropEmptyParas, /*< Discard empty p elements */
69 | TidyFixComments, /*< Fix comments with adjacent hyphens */
70 | TidyBreakBeforeBR, /*< Output newline before
or not? */
71 |
72 | [Obsolete]
73 | TidyBurstSlides, /*< Create slides on each h2 element */
74 |
75 | TidyNumEntities, /*< Use numeric entities */
76 | TidyQuoteMarks, /*< Output " marks as " */
77 | TidyQuoteNbsp, /*< Output non-breaking space as entity */
78 | TidyQuoteAmpersand, /*< Output naked ampersand as & */
79 | TidyWrapAttVals, /*< Wrap within attribute values */
80 | TidyWrapScriptlets, /*< Wrap within JavaScript string literals */
81 | TidyWrapSection, /*< Wrap within section tags */
82 | TidyWrapAsp, /*< Wrap within ASP pseudo elements */
83 | TidyWrapJste, /*< Wrap within JSTE pseudo elements */
84 | TidyWrapPhp, /*< Wrap within PHP pseudo elements */
85 | TidyFixBackslash, /*< Fix URLs by replacing \ with / */
86 | TidyIndentAttributes,/*< Newline+indent before each attribute */
87 | TidyXmlPIs, /*< If set to yes PIs must end with ?> */
88 | TidyXmlSpace, /*< If set to yes adds xml:space attr as needed */
89 | TidyEncloseBodyText, /*< If yes text at body is wrapped in P's */
90 | TidyEncloseBlockText,/*< If yes text in blocks is wrapped in P's */
91 | TidyKeepFileTimes, /*< If yes last modied time is preserved */
92 | TidyWord2000, /*< Draconian cleaning for Word2000 */
93 | TidyMark, /*< Add meta element indicating tidied doc */
94 | TidyEmacs, /*< If true format error output for GNU Emacs */
95 | TidyEmacsFile, /*< Name of current Emacs file */
96 | TidyLiteralAttribs, /*< If true attributes may use newlines */
97 | TidyBodyOnly, /*< Output BODY content only */
98 | TidyFixUri, /*< Applies URI encoding if necessary */
99 | TidyLowerLiterals, /*< Folds known attribute values to lower case */
100 | TidyHideComments, /*< Hides all (real) comments in output */
101 | TidyIndentCdata, /*< Indent section */
102 | TidyForceOutput, /*< Output document even if errors were found */
103 | TidyShowErrors, /*< Number of errors to put out */
104 | TidyAsciiChars, /*< Convert quotes and dashes to nearest ASCII char */
105 | TidyJoinClasses, /*< Join multiple class attributes */
106 | TidyJoinStyles, /*< Join multiple style attributes */
107 | TidyEscapeCdata, /*< Replace sections with escaped text */
108 | #if SUPPORT_ASIAN_ENCODINGS
109 | TidyLanguage, /*< Language property: not used for anything yet */
110 | TidyNCR, /*< Allow numeric character references */
111 | #else
112 | TidyLanguageNotUsed,
113 | TidyNCRNotUsed,
114 | #endif
115 | #if SUPPORT_UTF16_ENCODINGS
116 | TidyOutputBOM, /**< Output a Byte Order Mark (BOM) for UTF-16 encodings */
117 | /**< auto: if input stream has BOM, we output a BOM */
118 | #else
119 | TidyOutputBOMNotUsed,
120 | #endif
121 | TidyReplaceColor, /*< Replace hex color attribute values with names */
122 | TidyCSSPrefix, /*< CSS class naming for -clean option */
123 | TidyInlineTags, /*< Declared inline tags */
124 | TidyBlockTags, /*< Declared block tags */
125 | TidyEmptyTags, /*< Declared empty tags */
126 | TidyPreTags, /*< Declared pre tags */
127 | TidyAccessibilityCheckLevel, /*< Accessibility check level
128 | 0 (old style), or 1, 2, 3 */
129 | TidyVertSpace, /*< degree to which markup is spread out vertically */
130 | #if SUPPORT_ASIAN_ENCODINGS
131 | TidyPunctWrap, /*< consider punctuation and breaking spaces for wrapping */
132 | #else
133 | TidyPunctWrapNotUsed,
134 | #endif
135 | TidyMergeDivs, /*< Merge multiple DIVs */
136 | TidyDecorateInferredUL, /*< Mark inferred UL elements with no indent CSS */
137 | TidyPreserveEntities, /*< Preserve entities */
138 | TidySortAttributes, /*< Sort attributes */
139 | TidyMergeSpans, /*< Merge multiple SPANs */
140 | TidyAnchorAsName, /*< Define anchors as name attributes */
141 | N_TIDY_OPTIONS /*< Must be last */
142 | }
143 | }
144 |
--------------------------------------------------------------------------------
/Document.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009 Mark Beaton
2 | //
3 | // Permission is hereby granted, free of charge, to any person
4 | // obtaining a copy of this software and associated documentation
5 | // files (the "Software"), to deal in the Software without
6 | // restriction, including without limitation the rights to use,
7 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the
9 | // Software is furnished to do so, subject to the following
10 | // conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be
13 | // included in all copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | // OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | using System;
25 | using System.Collections.Generic;
26 | using System.Diagnostics;
27 | using System.Globalization;
28 | using System.IO;
29 | using System.Runtime.InteropServices;
30 | using System.Text;
31 | using TidyManaged.Interop;
32 |
33 | namespace TidyManaged
34 | {
35 | ///
36 | /// Represents an HTML document (or XML, XHTML) to be processed by Tidy.
37 | ///
38 | public class Document : IDisposable
39 | {
40 | #region Constructors
41 |
42 | Document()
43 | {
44 | this.handle = PInvoke.tidyCreate();
45 | this.disposed = false;
46 | }
47 |
48 | Document(string htmlString)
49 | : this()
50 | {
51 | this.htmlString = htmlString;
52 | this.fromString = true;
53 | }
54 |
55 |
56 | Document(Stream stream)
57 | : this()
58 | {
59 | this.stream = stream;
60 | }
61 |
62 | #endregion
63 |
64 | #region Fields
65 |
66 | IntPtr handle;
67 | Stream stream;
68 | string htmlString;
69 | bool fromString;
70 | bool disposed;
71 | bool cleaned;
72 |
73 | #endregion
74 |
75 | #region Properties
76 |
77 | DateTime? _ReleaseDate;
78 | static readonly object releaseDateLock = new object();
79 | ///
80 | /// Gets the release date of the underlying Tidy library.
81 | ///
82 | public DateTime ReleaseDate
83 | {
84 | get
85 | {
86 | lock (releaseDateLock)
87 | {
88 | if (!_ReleaseDate.HasValue)
89 | {
90 | DateTime val = DateTime.MinValue;
91 | string release = Marshal.PtrToStringAnsi(PInvoke.tidyReleaseDate());
92 | if (release != null)
93 | {
94 | string[] tokens = release.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
95 | if (tokens.Length >= 3)
96 | {
97 | DateTime.TryParseExact(tokens[0] + " " + tokens[1] + " " + tokens[2], "d MMMM yyyy", CultureInfo.InvariantCulture, DateTimeStyles.None, out val);
98 | }
99 | }
100 | _ReleaseDate = val;
101 | }
102 | return _ReleaseDate.Value;
103 | }
104 | }
105 | }
106 |
107 | #region HTML, XHTML, XML Options
108 |
109 | ///
110 | /// [add-xml-decl] Gets or sets whether Tidy should add the XML declaration when outputting XML or XHTML. Note that if the input already includes an <?xml ... ?> declaration then this option will be ignored. If the encoding for the output is different from "ascii", one of the utf encodings or "raw", the declaration is always added as required by the XML standard. Defaults to false.
111 | ///
112 | public bool AddXmlDeclaration
113 | {
114 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXmlDecl); }
115 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXmlDecl, value); }
116 | }
117 |
118 | ///
119 | /// [add-xml-space] Gets or sets whether Tidy should add xml:space="preserve" to elements such as <PRE>, <STYLE> and <SCRIPT> when generating XML. This is needed if the whitespace in such elements is to be parsed appropriately without having access to the DTD. Defaults to false.
120 | ///
121 | public bool AddXmlSpacePreserve
122 | {
123 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXmlSpace); }
124 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXmlSpace, value); }
125 | }
126 |
127 | ///
128 | /// [alt-text] Gets or sets the default "alt=" text Tidy uses for <IMG> attributes. This feature is dangerous as it suppresses further accessibility warnings. You are responsible for making your documents accessible to people who can not see the images!
129 | ///
130 | public string DefaultAltText
131 | {
132 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyAltText); }
133 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyAltText, value); }
134 | }
135 |
136 | ///
137 | /// [anchor-as-name] Gets or sets the deletion or addition of the name attribute in elements where it can serve as anchor. If set to true, a name attribute, if not already existing, is added along an existing id attribute if the DTD allows it. If set to false, any existing name attribute is removed if an id attribute exists or has been added. Defaults to true.
138 | ///
139 | public bool AnchorAsName
140 | {
141 | // Not available before until 18 Jun 2008
142 | get
143 | {
144 | if (this.ReleaseDate < new DateTime(2008, 6, 18))
145 | {
146 | Trace.WriteLine("AnchorAsName is not supported by your version of tidylib - ignoring.");
147 | return true;
148 | }
149 | return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyAnchorAsName);
150 | }
151 | set
152 | {
153 | if (this.ReleaseDate < new DateTime(2008, 6, 18))
154 | Trace.WriteLine("AnchorAsName is not supported by your version of tidylib - ignoring.");
155 | else
156 | PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyAnchorAsName, value);
157 | }
158 | }
159 |
160 | ///
161 | /// [assume-xml-procins] Gets or sets whether Tidy should change the parsing of processing instructions to require ?> as the terminator rather than >. This option is automatically set if the input is in XML. Defaults to false.
162 | ///
163 | public bool ChangeXmlProcessingInstructions
164 | {
165 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXmlPIs); }
166 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXmlPIs, value); }
167 | }
168 |
169 | ///
170 | /// [bare] Gets or sets whether Tidy should strip Microsoft specific HTML from Word 2000 documents, and output spaces rather than non-breaking spaces where they exist in the input. Defaults to false.
171 | ///
172 | public bool MakeBare
173 | {
174 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyMakeBare); }
175 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyMakeBare, value); }
176 | }
177 |
178 | ///
179 | /// [clean] Gets or sets whether Tidy should strip out surplus presentational tags and attributes replacing them by style rules and structural markup as appropriate. It works well on the HTML saved by Microsoft Office products. Defaults to false.
180 | ///
181 | public bool MakeClean
182 | {
183 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyMakeClean); }
184 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyMakeClean, value); }
185 | }
186 |
187 | ///
188 | /// [css-prefix] Gets or sets the prefix that Tidy uses for styles rules. By default, "c" will be used.
189 | ///
190 | public string CssPrefix
191 | {
192 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyCSSPrefix); }
193 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyCSSPrefix, value); }
194 | }
195 |
196 | ///
197 | /// [decorate-inferred-ul] Gets or sets whether Tidy should decorate inferred UL elements with some CSS markup to avoid indentation to the right. Defaults to false.
198 | ///
199 | public bool DecorateInferredUL
200 | {
201 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyDecorateInferredUL); }
202 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyDecorateInferredUL, value); }
203 | }
204 |
205 | ///
206 | /// [doctype] Gets or sets the DOCTYPE declaration generated by Tidy. If set to "Omit" the output won't contain a DOCTYPE declaration. If set to "Auto" (the default) Tidy will use an educated guess based upon the contents of the document. If set to "Strict", Tidy will set the DOCTYPE to the strict DTD. If set to "Loose", the DOCTYPE is set to the loose (transitional) DTD. Alternatively, you can supply a string for the formal public identifier (FPI).
207 | ///
208 | /// For example:
209 | /// doctype: "-//ACME//DTD HTML 3.14159//EN"
210 | ///
211 | /// If you specify the FPI for an XHTML document, Tidy will set the system identifier to an empty string. For an HTML document, Tidy adds a system identifier only if one was already present in order to preserve the processing mode of some browsers. Tidy leaves the DOCTYPE for generic XML documents unchanged. "Omit" implies OutputNumericEntities = true. This option does not offer a validation of the document conformance.
212 | ///
213 | public DocTypeMode DocType
214 | {
215 | get { return (DocTypeMode) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyDoctypeMode); }
216 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyDoctypeMode, (uint) value); }
217 | }
218 |
219 | ///
220 | /// [drop-empty-paras] Gets or sets whether Tidy should discard empty paragraphs. Defaults to true.
221 | ///
222 | public bool DropEmptyParagraphs
223 | {
224 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyDropEmptyParas); }
225 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyDropEmptyParas, value); }
226 | }
227 |
228 | ///
229 | /// [drop-font-tags] Gets or sets whether Tidy should discard <FONT> and <CENTER> tags without creating the corresponding style rules. This option can be set independently of the MakeClean option. Defaults to false.
230 | ///
231 | public bool DropFontTags
232 | {
233 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyDropFontTags); }
234 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyDropFontTags, value); }
235 | }
236 |
237 | ///
238 | /// [drop-proprietary-attributes] Gets or sets whether Tidy should strip out proprietary attributes, such as MS data binding attributes. Defaults to false.
239 | ///
240 | public bool DropProprietaryAttributes
241 | {
242 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyDropPropAttrs); }
243 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyDropPropAttrs, value); }
244 | }
245 |
246 | ///
247 | /// [enclose-block-text] Gets or sets whether Tidy should insert a <P> element to enclose any text it finds in any element that allows mixed content for HTML transitional but not HTML strict. Defaults to false.
248 | ///
249 | public bool EncloseBlockText
250 | {
251 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyEncloseBlockText); }
252 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyEncloseBlockText, value); }
253 | }
254 |
255 | ///
256 | /// [enclose-text] Gets or sets whether Tidy should enclose any text it finds in the body element within a <P> element. This is useful when you want to take existing HTML and use it with a style sheet. Defaults to false.
257 | ///
258 | public bool EncloseBodyText
259 | {
260 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyEncloseBodyText); }
261 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyEncloseBodyText, value); }
262 | }
263 |
264 | ///
265 | /// [escape-cdata] Gets or sets whether Tidy should convert <![CDATA[]]> sections to normal text. Defaults to false.
266 | ///
267 | public bool EscapeCdata
268 | {
269 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyEscapeCdata); }
270 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyEscapeCdata, value); }
271 | }
272 |
273 | ///
274 | /// [fix-backslash] Gets or sets whether Tidy should replace backslash characters "\" in URLs with forward slashes "/". Defaults to true.
275 | ///
276 | public bool FixUrlBackslashes
277 | {
278 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyFixBackslash); }
279 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyFixBackslash, value); }
280 | }
281 |
282 | ///
283 | /// [fix-bad-comments] Gets or sets whether Tidy should replace unexpected hyphens with "=" characters when it comes across adjacent hyphens. This option is provided for users of Cold Fusion which uses the comment syntax: <!--- --->. Defaults to true.
284 | ///
285 | public bool FixBadComments
286 | {
287 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyFixComments); }
288 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyFixComments, value); }
289 | }
290 |
291 | ///
292 | /// [fix-uri] Gets or sets whether Tidy should check attribute values that carry URIs for illegal characters and if such are found, escape them as HTML 4 recommends. Defaults to true.
293 | ///
294 | public bool FixAttributeUris
295 | {
296 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyFixUri); }
297 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyFixUri, value); }
298 | }
299 |
300 | ///
301 | /// [hide-comments] Gets or sets whether Tidy should print out comments. Defaults to false.
302 | ///
303 | public bool RemoveComments
304 | {
305 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyHideComments); }
306 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyHideComments, value); }
307 | }
308 |
309 | ///
310 | /// [hide-endtags] Gets or sets whether Tidy should omit optional end-tags when generating the pretty printed markup. This option is ignored if you are outputting to XML. Defaults to false.
311 | ///
312 | public bool RemoveEndTags
313 | {
314 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyHideEndTags); }
315 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyHideEndTags, value); }
316 | }
317 |
318 | ///
319 | /// [indent-cdata] Gets or sets whether Tidy should indent <![CDATA[]]> sections. Defaults to false.
320 | ///
321 | public bool IndentCdata
322 | {
323 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyIndentCdata); }
324 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyIndentCdata, value); }
325 | }
326 |
327 | ///
328 | /// [input-xml] Gets or sets whether Tidy use the XML parser rather than the error correcting HTML parser. Defaults to false.
329 | ///
330 | public bool UseXmlParser
331 | {
332 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXmlTags); }
333 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXmlTags, value); }
334 | }
335 |
336 | ///
337 | /// [join-classes] Gets or sets whether Tidy should combine class names to generate a single new class name, if multiple class assignments are detected on an element. Defaults to false.
338 | ///
339 | public bool JoinClasses
340 | {
341 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyJoinClasses); }
342 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyJoinClasses, value); }
343 | }
344 |
345 | ///
346 | /// [join-styles] Gets or sets whether Tidy should combine styles to generate a single new style, if multiple style values are detected on an element. Defaults to true.
347 | ///
348 | public bool JoinStyles
349 | {
350 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyJoinStyles); }
351 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyJoinStyles, value); }
352 | }
353 |
354 | ///
355 | /// [literal-attributes] Gets or sets whether Tidy should ensure that whitespace characters within attribute values are passed through unchanged. Defaults to false.
356 | ///
357 | public bool EnsureLiteralAttributes
358 | {
359 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyLiteralAttribs); }
360 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyLiteralAttribs, value); }
361 | }
362 |
363 | ///
364 | /// [logical-emphasis] Gets or sets whether Tidy should replace any occurrence of <I> by <EM> and any occurrence of <B> by <STRONG>. In both cases, the attributes are preserved unchanged. This option can be set independently of the "MakeClean" and "DropFontTags" properties. Defaults to false.
365 | ///
366 | public bool UseLogicalEmphasis
367 | {
368 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyLogicalEmphasis); }
369 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyLogicalEmphasis, value); }
370 | }
371 |
372 | ///
373 | /// [lower-literals] Gets or sets whether Tidy should convert the value of an attribute that takes a list of predefined values to lower case. This is required for XHTML documents. Defaults to false.
374 | ///
375 | public bool LowerCaseLiterals
376 | {
377 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyLowerLiterals); }
378 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyLowerLiterals, value); }
379 | }
380 |
381 | ///
382 | /// [merge-divs] Gets or sets whether Tidy should merge nested <div> such as "<div><divglt;...</div></div>". If set to "Auto", the attributes of the inner <div> are moved to the outer one. As well, nested <div> with ID attributes are not merged. If set to "Yes", the attributes of the inner <div> are discarded with the exception of "class" and "style". Can be used to modify behavior of the "MakeClean" option. Defaults to Auto.
383 | ///
384 | public AutoBool MergeDivs
385 | {
386 | get { return (AutoBool) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyMergeDivs); }
387 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyMergeDivs, (uint) value); }
388 | }
389 |
390 | ///
391 | /// [merge-spans] Gets or sets whether Tidy should merge nested <span> such as "<span><span;...</span></span>". The algorithm is identical to the one used by MergeDivs. Can be used to modify behavior of the "MakeClean" option. Defaults to "Auto".
392 | ///
393 | public AutoBool MergeSpans
394 | {
395 | // Not available before until 13 Aug 2007
396 | get
397 | {
398 | if (this.ReleaseDate < new DateTime(2007, 8, 13))
399 | {
400 | Trace.WriteLine("MergeSpans is not supported by your version of tidylib - ignoring.");
401 | return AutoBool.No;
402 | }
403 | return (AutoBool) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyMergeSpans);
404 | }
405 | set
406 | {
407 | if (this.ReleaseDate < new DateTime(2007, 8, 13))
408 | Trace.WriteLine("MergeSpans is not supported by your version of tidylib - ignoring.");
409 | else
410 | PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyMergeSpans, (uint) value);
411 | }
412 | }
413 |
414 | #if SUPPORT_ASIAN_ENCODINGS
415 | ///
416 | /// [ncr] Gets or sets whether Tidy should allow numeric character references. Defaults to true.
417 | ///
418 | public bool AllowNumericCharacterReferences
419 | {
420 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyNCR); }
421 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyNCR, value); }
422 | }
423 | #endif
424 |
425 | ///
426 | /// [new-blocklevel-tags] Gets or sets new block-level tags. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. Note you can't change the content model for elements such as <TABLE>, <UL>, <OL> and <DL>. This option is ignored in XML mode.
427 | ///
428 | public string NewBlockLevelTags
429 | {
430 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyBlockTags); }
431 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyBlockTags, value); }
432 | }
433 |
434 | ///
435 | /// [new-empty-tags] Gets or sets new empty inline tags. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. This option is ignored in XML mode.
436 | ///
437 | public string NewEmptyInlineTags
438 | {
439 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyEmptyTags); }
440 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyEmptyTags, value); }
441 | }
442 |
443 | ///
444 | /// [new-inline-tags] Gets or sets new non-empty inline tags. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. This option is ignored in XML mode.
445 | ///
446 | public string NewInlineTags
447 | {
448 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyInlineTags); }
449 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyInlineTags, value); }
450 | }
451 |
452 | ///
453 | /// [new-pre-tags] Gets or sets new tags that are to be processed in exactly the same way as HTML's <PRE> element. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. Note you can not as yet add new CDATA elements (similar to <SCRIPT>). This option is ignored in XML mode.
454 | ///
455 | public string NewPreTags
456 | {
457 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyPreTags); }
458 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyPreTags, value); }
459 | }
460 |
461 | ///
462 | /// [numeric-entities] Gets or sets whether Tidy should output entities other than the built-in HTML entities (&, <, > and ") in the numeric rather than the named entity form. Only entities compatible with the DOCTYPE declaration generated are used. Entities that can be represented in the output encoding are translated correspondingly. Defaults to false.
463 | ///
464 | public bool OutputNumericEntities
465 | {
466 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyNumEntities); }
467 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyNumEntities, value); }
468 | }
469 |
470 | ///
471 | /// [output-html] Gets or sets whether Tidy should generate pretty printed output, writing it as HTML. Defaults to false.
472 | ///
473 | public bool OutputHtml
474 | {
475 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyHtmlOut); }
476 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyHtmlOut, value); }
477 | }
478 |
479 | ///
480 | /// [output-xhtml] Gets or sets whether Tidy should generate pretty printed output, writing it as extensible HTML. This option causes Tidy to set the DOCTYPE and default namespace as appropriate to XHTML. If a DOCTYPE or namespace is given they will checked for consistency with the content of the document. In the case of an inconsistency, the corrected values will appear in the output. For XHTML, entities can be written as named or numeric entities according to the setting of the "OutputNumericEntities" value. The original case of tags and attributes will be preserved, regardless of other options. Defaults to false.
481 | ///
482 | public bool OutputXhtml
483 | {
484 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXhtmlOut); }
485 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXhtmlOut, value); }
486 | }
487 |
488 | ///
489 | /// [output-xml] Gets or sets whether Tidy should generate pretty printed output, writing it as well-formed XML. Any entities not defined in XML 1.0 will be written as numeric entities to allow them to be parsed by a XML parser. The original case of tags and attributes will be preserved, regardless of other options. Defaults to false.
490 | ///
491 | public bool OutputXml
492 | {
493 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyXmlOut); }
494 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyXmlOut, value); }
495 | }
496 |
497 | ///
498 | /// [preserve-entities] Gets or sets whether Tidy should preserve the well-formed entitites as found in the input. Defaults to false.
499 | ///
500 | public bool PreserveEntities
501 | {
502 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyPreserveEntities); }
503 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyPreserveEntities, value); }
504 | }
505 |
506 | ///
507 | /// [quote-ampersand] Gets or sets whether Tidy should output unadorned & characters as &. Defaults to true.
508 | ///
509 | public bool QuoteAmpersands
510 | {
511 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyQuoteAmpersand); }
512 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyQuoteAmpersand, value); }
513 | }
514 |
515 | ///
516 | /// [quote-marks] Gets or sets whether Tidy should output " characters as " as is preferred by some editing environments. The apostrophe character ' is written out as ' since many web browsers don't yet support '. Defaults to false.
517 | ///
518 | public bool QuoteMarks
519 | {
520 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyQuoteMarks); }
521 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyQuoteMarks, value); }
522 | }
523 |
524 | ///
525 | /// [quote-nbsp] Gets or sets whether Tidy should output non-breaking space characters as entities, rather than as the Unicode character value 160 (decimal). Defaults to true.
526 | ///
527 | public bool QuoteNonBreakingSpaces
528 | {
529 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyQuoteNbsp); }
530 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyQuoteNbsp, value); }
531 | }
532 |
533 | ///
534 | /// [repeated-attributes] Gets or sets whether Tidy should keep the first or last attribute, if an attribute is repeated, e.g. has two align attributes. Defaults to "KeepLast".
535 | ///
536 | public RepeatedAttributeMode RepeatedAttributeMode
537 | {
538 | get { return (RepeatedAttributeMode) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyDuplicateAttrs); }
539 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyDuplicateAttrs, (uint) value); }
540 | }
541 |
542 | ///
543 | /// [replace-color] Gets or sets whether Tidy should replace numeric values in color attributes by HTML/XHTML color names where defined, e.g. replace "#ffffff" with "white". Defaults to false.
544 | ///
545 | public bool UseColorNames
546 | {
547 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyReplaceColor); }
548 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyReplaceColor, value); }
549 | }
550 |
551 | ///
552 | /// [show-body-only] Gets or sets whether Tidy should print only the contents of the body tag as an HTML fragment. If set to "Auto", this is performed only if the body tag has been inferred. Useful for incorporating existing whole pages as a portion of another page. This option has no effect if XML output is requested. Defaults to "No".
553 | ///
554 | public AutoBool OutputBodyOnly
555 | {
556 | // This option was changed from a Bool to an AutoBool on 24 May 2007.
557 | get
558 | {
559 | if (this.ReleaseDate < new DateTime(2007, 5, 24))
560 | return (PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyBodyOnly) ? AutoBool.Yes : AutoBool.No);
561 | else
562 | return (AutoBool) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyBodyOnly);
563 | }
564 | set
565 | {
566 | if (this.ReleaseDate < new DateTime(2007, 5, 24))
567 | PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyBodyOnly, (value == AutoBool.Yes));
568 | else
569 | PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyBodyOnly, (uint) value);
570 | }
571 | }
572 |
573 | ///
574 | /// [uppercase-attributes] Gets or sets whether Tidy should output attribute names in upper case. The default is false, which results in lower case attribute names, except for XML input, where the original case is preserved.
575 | ///
576 | public bool UpperCaseAttributes
577 | {
578 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyUpperCaseAttrs); }
579 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyUpperCaseAttrs, value); }
580 | }
581 |
582 | ///
583 | /// [uppercase-tags] Gets or sets whether Tidy should output tag names in upper case. The default is false, which results in lower case tag names, except for XML input, where the original case is preserved.
584 | ///
585 | public bool UpperCaseTags
586 | {
587 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyUpperCaseTags); }
588 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyUpperCaseTags, value); }
589 | }
590 |
591 | ///
592 | /// [word-2000] Gets or sets whether Tidy should go to great pains to strip out all the surplus stuff Microsoft Word 2000 inserts when you save Word documents as "Web pages". Doesn't handle embedded images or VML. You should consider using Word's "Save As: Web Page, Filtered". Defaults to false.
593 | ///
594 | public bool CleanWord2000
595 | {
596 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWord2000); }
597 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWord2000, value); }
598 | }
599 |
600 | #endregion
601 |
602 | #region Diagnostics Options
603 |
604 | ///
605 | /// [accessibility-check] Gets or sets the level of accessibility checking, if any, that Tidy should do. Defaults to TidyClassic.
606 | ///
607 | public AccessibilityCheckLevel AccessibilityCheckLevel
608 | {
609 | get { return (AccessibilityCheckLevel) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyAccessibilityCheckLevel); }
610 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyAccessibilityCheckLevel, (uint) value); }
611 | }
612 |
613 | ///
614 | /// [show-errors] Gets or sets the number Tidy uses to determine if further errors should be shown. If set to 0, then no errors are shown. Defaults to 6.
615 | ///
616 | public int MaximumErrors
617 | {
618 | get { return (int) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyShowErrors); }
619 | set
620 | {
621 | if (value < 0) value = 0;
622 | PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyShowErrors, (uint) value);
623 | }
624 | }
625 |
626 | ///
627 | /// [show-warnings] Gets or sets whether Tidy should suppress warnings. This can be useful when a few errors are hidden in a flurry of warnings. Defaults to true.
628 | ///
629 | public bool ShowWarnings
630 | {
631 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyShowWarnings); }
632 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyShowWarnings, value); }
633 | }
634 |
635 | #endregion
636 |
637 | #region Pretty Print Options
638 |
639 | ///
640 | /// [break-before-br] Gets or sets whether Tidy should output a line break before each <BR> element. Defaults to false.
641 | ///
642 | public bool LineBreakBeforeBR
643 | {
644 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyBreakBeforeBR); }
645 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyBreakBeforeBR, value); }
646 | }
647 |
648 | ///
649 | /// [indent] Gets or sets whether Tidy should indent block-level tags. If set to Auto, this option causes Tidy to decide whether or not to indent the content of tags such as TITLE, H1-H6, LI, TD, TD, or P depending on whether or not the content includes a block-level element. You are advised to avoid setting indent to Yes as this can expose layout bugs in some browsers. Defaults to No.
650 | ///
651 | public AutoBool IndentBlockElements
652 | {
653 | get { return (AutoBool) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyIndentContent); }
654 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyIndentContent, (uint) value); }
655 | }
656 |
657 | ///
658 | /// [indent-attributes] Gets or sets whether Tidy should begin each attribute on a new line. Defaults to false.
659 | ///
660 | public bool IndentAttributes
661 | {
662 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyIndentAttributes); }
663 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyIndentAttributes, value); }
664 | }
665 |
666 | ///
667 | /// [indent-spaces] Gets or sets the number of spaces Tidy uses to indent content, when indentation is enabled. Defaults to 2.
668 | ///
669 | public int IndentSpaces
670 | {
671 | get { return (int) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyIndentSpaces); }
672 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyIndentSpaces, (uint) value); }
673 | }
674 |
675 | ///
676 | /// [markup] Gets or sets whether Tidy should generate a pretty printed version of the markup. Note that Tidy won't generate a pretty printed version if it finds significant errors (see ForceOutput). Defaults to true.
677 | ///
678 | public bool Markup
679 | {
680 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyShowMarkup); }
681 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyShowMarkup, value); }
682 | }
683 |
684 | #if SUPPORT_ASIAN_ENCODINGS
685 | ///
686 | /// [punctuation-wrap] Gets or sets whether Tidy should line wrap after some Unicode or Chinese punctuation characters. Defaults to false.
687 | ///
688 | public bool PunctuationWrap
689 | {
690 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyPunctWrap); }
691 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyPunctWrap, value); }
692 | }
693 | #endif
694 |
695 | ///
696 | /// [sort-attributes] Gets or sets how Tidy should sort attributes within an element using the specified sort algorithm. If set to Alpha, the algorithm is an ascending alphabetic sort. Defaults to None.
697 | ///
698 | public SortStrategy AttributeSortType
699 | {
700 | // Not available before until 6 Jun 2007
701 | get
702 | {
703 | if (this.ReleaseDate < new DateTime(2007, 6, 12))
704 | {
705 | Trace.WriteLine("AttributeSortType is not supported by your version of tidylib - ignoring.");
706 | return SortStrategy.None;
707 | }
708 | return (SortStrategy) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidySortAttributes);
709 | }
710 | set
711 | {
712 | if (this.ReleaseDate < new DateTime(2007, 6, 12))
713 | Trace.WriteLine("AttributeSortType is not supported by your version of tidylib - ignoring.");
714 | else
715 | PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidySortAttributes, (uint) value);
716 | }
717 | }
718 |
719 | ///
720 | /// [tab-size] Gets or sets the number of columns that Tidy uses between successive tab stops. It is used to map tabs to spaces when reading the input. Tidy never outputs tabs. Defaults to 8.
721 | ///
722 | public int TabSize
723 | {
724 | get { return (int) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyTabSize); }
725 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyTabSize, (uint) value); }
726 | }
727 |
728 | ///
729 | /// [vertical-space] Gets or sets whether Tidy should add some empty lines for readability. Defaults to false.
730 | ///
731 | public bool AddVerticalSpace
732 | {
733 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyVertSpace); }
734 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyVertSpace, value); }
735 | }
736 |
737 | ///
738 | /// [wrap] Gets or sets the right margin Tidy uses for line wrapping. Tidy tries to wrap lines so that they do not exceed this length. Set wrap to zero if you want to disable line wrapping. Defaults to 68.
739 | ///
740 | public int WrapAt
741 | {
742 | get { return (int) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyWrapLen); }
743 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyWrapLen, (uint) value); }
744 | }
745 |
746 | ///
747 | /// [wrap-asp] Gets or sets whether Tidy should line wrap text contained within ASP pseudo elements, which look like: <% ... %>. Defaults to true.
748 | ///
749 | public bool WrapAsp
750 | {
751 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapAsp); }
752 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapAsp, value); }
753 | }
754 |
755 | ///
756 | /// [wrap-attributes] Gets or sets whether Tidy should line wrap attribute values, for easier editing. This option can be set independently of WrapAcriptLiterals. Defaults to false.
757 | ///
758 | public bool WrapAttributeValues
759 | {
760 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapAttVals); }
761 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapAttVals, value); }
762 | }
763 |
764 | ///
765 | /// [wrap-jste] Gets or sets whether Tidy should line wrap text contained within JSTE pseudo elements, which look like: <# ... #>. Defaults to true.
766 | ///
767 | public bool WrapJste
768 | {
769 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapJste); }
770 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapJste, value); }
771 | }
772 |
773 | ///
774 | /// [wrap-php] Gets or sets whether Tidy should line wrap text contained within PHP pseudo elements, which look like: <?php ... ?>. Defaults to true.
775 | ///
776 | public bool WrapPhp
777 | {
778 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapPhp); }
779 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapPhp, value); }
780 | }
781 |
782 | ///
783 | /// [wrap-script-literals] Gets or sets whether Tidy should line wrap string literals that appear in script attributes. Tidy wraps long script string literals by inserting a backslash character before the line break. Defaults to false.
784 | ///
785 | public bool WrapScriptLiterals
786 | {
787 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapScriptlets); }
788 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapScriptlets, value); }
789 | }
790 |
791 | ///
792 | /// [wrap-sections] Gets or sets whether Tidy should line wrap text contained within <![ ... ]> section tags. Defaults to true.
793 | ///
794 | public bool WrapSections
795 | {
796 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWrapSection); }
797 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWrapSection, value); }
798 | }
799 |
800 | #endregion
801 |
802 | #region Character Encoding Options
803 |
804 | ///
805 | /// [ascii-chars] Gets or sets whether &emdash;, ”, and other named character entities are downgraded to their closest ascii equivalents when the "MakeClean" option is set to true. Defaults to false.
806 | ///
807 | public bool AsciiEntities
808 | {
809 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyAsciiChars); }
810 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyAsciiChars, value); }
811 | }
812 |
813 | ///
814 | /// [char-encoding] Gets or sets character encoding Tidy uses for both the input and output. For ascii, Tidy will accept Latin-1 (ISO-8859-1) character values, but will use entities for all characters whose value > 127. For raw, Tidy will output values above 127 without translating them into entities. For latin1, characters above 255 will be written as entities. For utf8, Tidy assumes that both input and output is encoded as UTF-8. You can use iso2022 for files encoded using the ISO-2022 family of encodings e.g. ISO-2022-JP. For mac and win1252, Tidy will accept vendor specific character values, but will use entities for all characters whose value > 127. For unsupported encodings, use an external utility to convert to and from UTF-8. Defaults to "Ascii".
815 | ///
816 | public EncodingType CharacterEncoding
817 | {
818 | get { return (EncodingType) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyCharEncoding); }
819 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyCharEncoding, (uint) value); }
820 | }
821 |
822 | ///
823 | /// [input-encoding] Gets or sets character encoding Tidy uses for the input. See CharacterEncoding for more info. Defaults to "Latin1".
824 | ///
825 | public EncodingType InputCharacterEncoding
826 | {
827 | get { return (EncodingType) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyInCharEncoding); }
828 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyInCharEncoding, (uint) value); }
829 | }
830 |
831 | ///
832 | /// [newline] Gets or sets the type of newline. The default is appropriate to the current platform: CRLF on PC-DOS, MS-Windows and OS/2, CR on Classic Mac OS, and LF everywhere else (Unix and Linux).
833 | ///
834 | public NewlineType NewLine
835 | {
836 | get { return (NewlineType) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyNewline); }
837 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyNewline, (uint) value); }
838 | }
839 |
840 | #if SUPPORT_UTF16_ENCODINGS
841 | ///
842 | /// [output-bom] Gets or sets whether Tidy should write a Unicode Byte Order Mark character (BOM; also known as Zero Width No-Break Space; has value of U+FEFF) to the beginning of the output; only for UTF-8 and UTF-16 output encodings. If set to "auto", this option causes Tidy to write a BOM to the output only if a BOM was present at the beginning of the input. A BOM is always written for XML/XHTML output using UTF-16 output encodings. Defaults to "Auto".
843 | ///
844 | public AutoBool OutputByteOrderMark
845 | {
846 | get { return (AutoBool) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyOutputBOM); }
847 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyOutputBOM, (uint) value); }
848 | }
849 | #endif
850 |
851 | ///
852 | /// [output-encoding] Gets or sets character encoding Tidy uses for the output. See CharacterEncoding for more info. May only be different from input-encoding for Latin encodings (ascii, latin0, latin1, mac, win1252, ibm858). Defaults to "Ascii".
853 | ///
854 | public EncodingType OutputCharacterEncoding
855 | {
856 | get { return (EncodingType) PInvoke.tidyOptGetInt(this.handle, TidyOptionId.TidyOutCharEncoding); }
857 | set { PInvoke.tidyOptSetInt(this.handle, TidyOptionId.TidyOutCharEncoding, (uint) value); }
858 | }
859 |
860 | #endregion
861 |
862 | #region Miscellaneous Options
863 |
864 | ///
865 | /// [error-file] Gets or sets the error file Tidy uses for errors and warnings. Normally errors and warnings are output to "stderr". Defaults to null.
866 | ///
867 | public string ErrorFile
868 | {
869 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyErrFile); }
870 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyErrFile, value); }
871 | }
872 |
873 | ///
874 | /// [force-output] Gets or sets whether Tidy should produce output even if errors are encountered. Use this option with care - if Tidy reports an error, this means Tidy was not able to, or is not sure how to, fix the error, so the resulting output may not reflect your intention. Defaults to false.
875 | ///
876 | public bool ForceOutput
877 | {
878 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyForceOutput); }
879 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyForceOutput, value); }
880 | }
881 |
882 | ///
883 | /// [gnu-emacs] Gets or sets whether Tidy should change the format for reporting errors and warnings to a format that is more easily parsed by GNU Emacs. Defaults to false.
884 | ///
885 | public bool UseGnuEmacsErrorFormat
886 | {
887 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyEmacs); }
888 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyEmacs, value); }
889 | }
890 |
891 | ///
892 | /// [keep-time] Gets or sets whether Tidy should keep the original modification time of files that Tidy modifies in place. The default is no. Setting the option to yes allows you to tidy files without causing these files to be uploaded to a web server when using a tool such as SiteCopy. Note this feature is not supported on some platforms. Defaults to false.
893 | ///
894 | public bool KeepModificationTimestamp
895 | {
896 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyKeepFileTimes); }
897 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyKeepFileTimes, value); }
898 | }
899 |
900 | ///
901 | /// [output-file] Gets or sets the output file Tidy uses for markup. Normally markup is written to "stdout". Defaults to null.
902 | ///
903 | public string OutputFile
904 | {
905 | get { return PInvoke.tidyOptGetValueString(this.handle, TidyOptionId.TidyOutFile); }
906 | set { PInvoke.tidyOptSetValue(this.handle, TidyOptionId.TidyOutFile, value); }
907 | }
908 |
909 | ///
910 | /// [quiet] Gets or sets whether Tidy should output the summary of the numbers of errors and warnings, or the welcome or informational messages. Defaults to false.
911 | ///
912 | public bool Quiet
913 | {
914 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyQuiet); }
915 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyQuiet, value); }
916 | }
917 |
918 | ///
919 | /// [tidy-mark] Gets or sets whether Tidy should add a meta element to the document head to indicate that the document has been tidied. Tidy won't add a meta element if one is already present. Defaults to true.
920 | ///
921 | public bool AddTidyMetaElement
922 | {
923 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyMark); }
924 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyMark, value); }
925 | }
926 |
927 | ///
928 | /// [write-back] Gets or sets whether Tidy should write back the tidied markup to the same file it read from. You are advised to keep copies of important files before tidying them, as on rare occasions the result may not be what you expect. Defaults to false.
929 | ///
930 | public bool WriteBack
931 | {
932 | get { return PInvoke.tidyOptGetBool(this.handle, TidyOptionId.TidyWriteBack); }
933 | set { PInvoke.tidyOptSetBool(this.handle, TidyOptionId.TidyWriteBack, value); }
934 | }
935 |
936 | #endregion
937 |
938 | #endregion
939 |
940 | #region Methods
941 |
942 | ///
943 | /// Parses input markup, and executes configured cleanup and repair operations.
944 | ///
945 | public void CleanAndRepair()
946 | {
947 | if (fromString)
948 | {
949 | EncodingType tempEnc = this.InputCharacterEncoding;
950 | this.InputCharacterEncoding = EncodingType.Utf8;
951 | PInvoke.tidyParseString(this.handle, this.htmlString);
952 | this.InputCharacterEncoding = tempEnc;
953 | }
954 | else
955 | {
956 | InputSource input = new InputSource(this.stream);
957 | PInvoke.tidyParseSource(this.handle, ref input.TidyInputSource);
958 | }
959 | PInvoke.tidyCleanAndRepair(this.handle);
960 | cleaned = true;
961 | }
962 |
963 | ///
964 | /// Saves the processed markup to a string.
965 | ///
966 | /// A string containing the processed markup.
967 | public string Save()
968 | {
969 | if (!cleaned)
970 | throw new InvalidOperationException("CleanAndRepair() must be called before Save().");
971 |
972 | var tempEnc = this.CharacterEncoding;
973 | var tempBOM = this.OutputByteOrderMark;
974 | this.OutputCharacterEncoding = EncodingType.Utf8;
975 | this.OutputByteOrderMark = AutoBool.No;
976 |
977 | uint bufferLength = 1;
978 | byte[] htmlBytes;
979 | GCHandle handle = new GCHandle();
980 | do
981 | {
982 | // Buffer was too small - bufferLength should now be the required length, so try again...
983 | if (handle.IsAllocated) handle.Free();
984 |
985 | // this setting appears to be reset by libtidy after calling tidySaveString; we need to set it each time
986 | this.OutputCharacterEncoding = EncodingType.Utf8;
987 |
988 | htmlBytes = new byte[bufferLength];
989 | handle = GCHandle.Alloc(htmlBytes, GCHandleType.Pinned);
990 | } while (PInvoke.tidySaveString(this.handle, handle.AddrOfPinnedObject(), ref bufferLength) == -12);
991 |
992 | handle.Free();
993 |
994 | this.OutputCharacterEncoding = tempEnc;
995 | this.OutputByteOrderMark = tempBOM;
996 | return Encoding.UTF8.GetString(htmlBytes);
997 | }
998 |
999 | ///
1000 | /// Saves the processed markup to a file.
1001 | ///
1002 | /// The full filesystem path of the file to save the markup to.
1003 | public void Save(string filePath)
1004 | {
1005 | if (!cleaned)
1006 | throw new InvalidOperationException("CleanAndRepair() must be called before Save().");
1007 |
1008 | PInvoke.tidySaveFile(this.handle, filePath);
1009 | }
1010 |
1011 | ///
1012 | /// Saves the processed markup to the supplied stream.
1013 | ///
1014 | /// A to write the markup to.
1015 | public void Save(Stream stream)
1016 | {
1017 | if (!cleaned)
1018 | throw new InvalidOperationException("CleanAndRepair() must be called before Save().");
1019 |
1020 | EncodingType tempEnc = this.OutputCharacterEncoding;
1021 | if (fromString) this.OutputCharacterEncoding = EncodingType.Utf8;
1022 | OutputSink sink = new OutputSink(stream);
1023 | PInvoke.tidySaveSink(this.handle, ref sink.TidyOutputSink);
1024 | if (fromString) this.OutputCharacterEncoding = tempEnc;
1025 | }
1026 |
1027 | #endregion
1028 |
1029 | #region Static Methods
1030 |
1031 | ///
1032 | /// Creates a new instance from a containing HTML.
1033 | ///
1034 | /// The HTML string to be processed.
1035 | public static Document FromString(string htmlString)
1036 | {
1037 | if (htmlString == null)
1038 | throw new ArgumentNullException("htmlString");
1039 |
1040 | return new Document(htmlString);
1041 | }
1042 |
1043 | ///
1044 | /// Creates a new instance from a file.
1045 | ///
1046 | /// The full filesystem path of the HTML document to be processed.
1047 | public static Document FromFile(string filePath)
1048 | {
1049 | if (!File.Exists(filePath))
1050 | throw new FileNotFoundException("File not found.", filePath);
1051 |
1052 | return new Document(new FileStream(filePath, FileMode.Open));
1053 | }
1054 |
1055 | ///
1056 | /// Creates a new instance from a instance.
1057 | ///
1058 | /// A instance containing the HTML document to be processed.
1059 | public static Document FromStream(Stream stream)
1060 | {
1061 | if (stream == null)
1062 | throw new ArgumentNullException("stream");
1063 | if (!stream.CanRead)
1064 | throw new ArgumentException("Stream must be readable.");
1065 | if (!stream.CanSeek)
1066 | throw new ArgumentException("Stream must be seekable.");
1067 |
1068 | return new Document(stream);
1069 | }
1070 |
1071 | #endregion
1072 |
1073 | #region IDisposable Members
1074 |
1075 | ///
1076 | /// Disposes of all unmanaged resources.
1077 | ///
1078 | public void Dispose()
1079 | {
1080 | this.Dispose(true);
1081 | GC.SuppressFinalize(this);
1082 | }
1083 |
1084 | ///
1085 | /// Disposes of all unmanaged resources.
1086 | ///
1087 | /// Indicates whether the the document is already being disposed of.
1088 | protected virtual void Dispose(bool disposing)
1089 | {
1090 | if (!this.disposed)
1091 | {
1092 | if (disposing)
1093 | {
1094 | if (this.stream != null) this.stream.Dispose();
1095 | PInvoke.tidyRelease(this.handle);
1096 | }
1097 | this.handle = IntPtr.Zero;
1098 | this.stream = null;
1099 | this.disposed = true;
1100 | }
1101 | }
1102 |
1103 | #endregion
1104 | }
1105 | }
1106 |
--------------------------------------------------------------------------------