())
205 | {
206 | content.Append(ProcessParagraph(element, mainPart));
207 | }
208 | return content.ToString().Replace("\n", "
").Trim();
209 | })
210 | .ToList();
211 | }
212 | }
213 | }
214 |
215 |
--------------------------------------------------------------------------------
/MarkItDown/Helpers/CustomMarkdownConverter.cs:
--------------------------------------------------------------------------------
1 | // Helpers/CustomMarkdownConverter.cs
2 |
3 | using System;
4 | using System.Collections.Generic;
5 | using System.Linq;
6 | using System.Text;
7 | using HtmlAgilityPack;
8 | using ReverseMarkdown;
9 | using ReverseMarkdown.Converters;
10 |
11 | namespace MarkItDownSharp.Helpers
12 | {
13 | public class CustomMarkdownConverter
14 | {
15 | ///
16 | /// Converts an HTML string to Markdown.
17 | ///
18 | /// The HTML string.
19 | /// Markdown as a string.
20 | public string ConvertToMarkdown(string html)
21 | {
22 | // Load the HTML into an HtmlDocument
23 | var doc = new HtmlDocument();
24 | doc.LoadHtml(html);
25 |
26 | // Remove unwanted nodes (script, style, and macro buttons)
27 | RemoveNodes(doc, "//script|//style|//button[contains(@class, 'conf-macro')]");
28 |
29 | // Remove
nodes whose src attribute starts with data:image
30 | var imageNodes = doc.DocumentNode.SelectNodes("//img");
31 | if (imageNodes != null)
32 | {
33 | foreach (var img in imageNodes)
34 | {
35 | var src = img.GetAttributeValue("src", string.Empty);
36 | if (!string.IsNullOrEmpty(src) && src.StartsWith("data:image/"))
37 | img.Remove();
38 | }
39 | }
40 |
41 | // Preprocess: Fix situations where block-level elements (like div or table)
42 | // are nested inside tags and unwrap them.
43 | FixInvalidParagraphs(doc);
44 |
45 | // Preprocess: remove unnecessary attributes from nodes (to help the converters)
46 | SanitizeNodes(doc);
47 |
48 | // Get the cleaned-up HTML string
49 | var cleanedHtml = doc.DocumentNode.OuterHtml;
50 |
51 | // Configure ReverseMarkdown
52 | var config = new Config
53 | {
54 | UnknownTags = Config.UnknownTagsOption.Drop,
55 | GithubFlavored = true,
56 | RemoveComments = true,
57 | SmartHrefHandling = true,
58 | CleanupUnnecessarySpaces = true
59 | };
60 |
61 | var converter = new Converter(config);
62 |
63 | // Register custom converters to enhance parsing:
64 | // • Custom list converters so that lists inside table cells get rendered inline.
65 | // • An inline converter so that tags like and