├── Dockerfile
├── LICENSE
├── README.md
├── chromeDriver
├── chromedriver_linux64
│ ├── LICENSE.chromedriver
│ └── chromedriver
├── chromedriver_mac64
│ ├── LICENSE.chromedriver
│ └── chromedriver
├── chromedriver_mac_arm64
│ ├── LICENSE.chromedriver
│ └── chromedriver
└── chromedriver_win32
│ ├── LICENSE.chromedriver
│ └── chromedriver.exe
├── docker-compose.yml
├── pom.xml
├── readme_images
├── Snipaste_2020-10-19_15-16-27.png
├── Snipaste_2020-10-19_15-16-40.png
├── Snipaste_2020-10-19_15-16-52.png
├── favicon.ico
├── img.png
├── img_1.png
├── img_2.png
├── img_3.png
├── img_4.png
├── img_5.png
├── my.png
└── start.md
├── src
├── main
│ ├── java
│ │ └── com
│ │ │ ├── liangtengyu
│ │ │ └── markdown
│ │ │ │ ├── MarkDownApplication.java
│ │ │ │ ├── config
│ │ │ │ ├── AppBean.java
│ │ │ │ ├── ApplicationConfig.java
│ │ │ │ ├── StartupConfig.java
│ │ │ │ └── ThreadPoolConfig.java
│ │ │ │ ├── controller
│ │ │ │ ├── PageController.java
│ │ │ │ ├── RequestController.java
│ │ │ │ └── SettingController.java
│ │ │ │ ├── dao
│ │ │ │ ├── MDDao.java
│ │ │ │ ├── PICDao.java
│ │ │ │ ├── SETTINGDao.java
│ │ │ │ └── UserTemplateDao.java
│ │ │ │ ├── entity
│ │ │ │ ├── MD.java
│ │ │ │ ├── MarkDown.java
│ │ │ │ ├── PIC.java
│ │ │ │ ├── SETTING.java
│ │ │ │ └── UserTemplate.java
│ │ │ │ ├── service
│ │ │ │ ├── FilelistService.java
│ │ │ │ ├── HandleService.java
│ │ │ │ ├── Impl
│ │ │ │ │ ├── CSDNHandleService.java
│ │ │ │ │ ├── CsdnBlogHandleService.java
│ │ │ │ │ ├── FilelistServiceImpl.java
│ │ │ │ │ ├── JianshuHandleService.java
│ │ │ │ │ ├── JuejinHandleService.java
│ │ │ │ │ ├── MarkDownService.java
│ │ │ │ │ ├── SaveFileServiceImpl.java
│ │ │ │ │ ├── SegmentFaultHandleService.java
│ │ │ │ │ ├── SettingServiceImpl.java
│ │ │ │ │ ├── V2exHandleService.java
│ │ │ │ │ ├── WeiXinHandleService.java
│ │ │ │ │ ├── YuqueHandleService.java
│ │ │ │ │ └── ZhihuHandleService.java
│ │ │ │ ├── ResolveService.java
│ │ │ │ ├── SaveFileService.java
│ │ │ │ └── SettingService.java
│ │ │ │ └── utils
│ │ │ │ ├── ImageUtil.java
│ │ │ │ └── MarkDownUtil.java
│ │ │ └── overzealous
│ │ │ └── remark
│ │ │ ├── Remark.java
│ │ │ └── convert
│ │ │ ├── CmLine.java
│ │ │ ├── Codeblock.java
│ │ │ ├── Header.java
│ │ │ ├── Image.java
│ │ │ └── InlineStyle.java
│ └── resources
│ │ ├── application.yml
│ │ ├── banner.txt
│ │ ├── data.sql
│ │ ├── schema.sql
│ │ ├── static
│ │ ├── css
│ │ │ ├── about.ae075234.css
│ │ │ ├── app.400b5e86.css
│ │ │ └── chunk-vendors.07e3bc5b.css
│ │ ├── favicon.ico
│ │ ├── fonts
│ │ │ ├── fontello.068ca2b3.ttf
│ │ │ ├── fontello.8d4a4e6f.woff2
│ │ │ ├── fontello.a782baa8.woff
│ │ │ └── fontello.e73a0647.eot
│ │ ├── img
│ │ │ ├── 1614755729311.82e3994d.jpg
│ │ │ └── fontello.9354499c.svg
│ │ ├── index.html
│ │ └── js
│ │ │ ├── about.37e01380.js
│ │ │ ├── app.c2a083e4.js
│ │ │ └── chunk-vendors.531895a6.js
│ │ └── templates
│ │ └── index.html
└── test
│ └── java
│ └── com
│ └── liangtengyu
│ └── markdown
│ └── MarkDownApplicationTests.java
├── vue_project
├── .gitignore
├── README.md
├── babel.config.js
├── package.json
├── src
│ ├── App.vue
│ ├── assets
│ │ ├── 1614755729311.jpg
│ │ └── logo.png
│ ├── components
│ │ └── WebInfo.vue
│ ├── main.js
│ ├── router
│ │ └── index.js
│ └── views
│ │ ├── About.vue
│ │ ├── Filelist.vue
│ │ ├── Home.vue
│ │ ├── config.vue
│ │ ├── manage.vue
│ │ └── upload.vue
├── vue.config.js
└── yarn.lock
└── windows
└── tomarkdown.rar
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM openjdk:8
2 |
3 | COPY /chromeDriver /chromeDriver
4 | EXPOSE 9999
5 | COPY ./target/markdown_resolve.jar /markdown_resolve.jar
6 | ENTRYPOINT ["java", "-jar", "/markdown_resolve.jar"]
7 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
It is recommended that you saveToFile this class if it is going to be reused for better performance. This class
36 | * is thread-safe, but can only process a single document concurrently.
37 | *
38 | *
Usage:
39 | *
40 | *
Basic usage involves instantiating this class with a specific set of options, and calling one of the
41 | * {@code convert*} methods on some form of input.
42 | *
43 | *
Examples:
44 | *
45 | *
46 | * // Create a generic remark that converts to pure-Markdown spec.
47 | * Remark remark = new Remark();
48 | * String cleanedUp = remark.convertFragment(inputString);
49 | *
50 | * // Create a remark that converts to pegdown with all extensions enabled.
51 | * Remark pegdownAll = new Remark(Options.pegdownAllExtensions());
52 | * cleanedUp = pegdownAll.convert(new URL("http://www.example.com"), 15000);
53 | *
54 | * // stream the conversion
55 | * pegdownAll.withStream(System.out).convert(new URL("http://www.overzealous.com"), 15000);
56 | *
57 | *
58 | *
59 | * @author Phil DeJarnett
60 | */
61 | public class Remark {
62 | private final Cleaner cleaner;
63 | @SuppressWarnings({"FieldCanBeLocal", "UnusedDeclaration"})
64 | private final Options options;
65 | private final DocumentConverter converter;
66 | private final ReentrantLock converterLock = new ReentrantLock();
67 | private boolean cleanedHtmlEchoed = false;
68 |
69 | /**
70 | * Creates a default, pure Markdown-compatible Remark instance.
71 | */
72 | public Remark() {
73 | this(Options.markdown());
74 | }
75 |
76 | /**
77 | * Creates a Remark instance with the specified options.
78 | *
79 | * @param options Specified options to use on this instance. See the docs for the Options class for common options sets.
80 | */
81 | public Remark(Options options) {
82 | this.options = options.getCopy();
83 | Whitelist whitelist = Whitelist.basicWithImages()
84 | .addTags("div",
85 | "h1", "h2", "h3", "h4", "h5", "h6",
86 | "table", "tbody", "td", "tfoot", "th", "thead", "tr",
87 | "hr",
88 | "span", "font")
89 | .addAttributes("th", "colspan", "align", "style")
90 | .addAttributes("td", "colspan", "align", "style")
91 | .addAttributes(":all", "title", "style");
92 | if(options.preserveRelativeLinks) {
93 | whitelist.preserveRelativeLinks(true);
94 | }
95 | if(options.abbreviations) {
96 | whitelist.addTags("abbr", "acronym");
97 | }
98 | if(options.headerIds) {
99 | for(int i=1; i<=6; i++) {
100 | whitelist.addAttributes("h"+i, "id");
101 | }
102 | }
103 | for(final IgnoredHtmlElement el : options.getIgnoredHtmlElements()) {
104 | whitelist.addTags(el.getTagName());
105 | if(!el.getAttributes().isEmpty()) {
106 | whitelist.addAttributes(el.getTagName(), el.getAttributes().toArray(new String[el.getAttributes().size()]));
107 | }
108 | }
109 | cleaner = new Cleaner(whitelist);
110 |
111 | if(options.getTables().isLeftAsHtml()) {
112 | // we need to allow the table nodes to be ignored
113 | // since they are automatically ignored recursively, this is the only node we worry about.
114 | options.getIgnoredHtmlElements().add(IgnoredHtmlElement.create("table"));
115 | }
116 |
117 | converter = new DocumentConverter(options);
118 | }
119 |
120 | /**
121 | * Provides access to the DocumentConverter for customization.
122 | *
123 | * @return the configured DocumentConverter.
124 | */
125 | @SuppressWarnings({"UnusedDeclaration"})
126 | public DocumentConverter getConverter() {
127 | return converter;
128 | }
129 |
130 | /**
131 | * Returns true if the cleaned HTML document is echoed to {@code System.out}.
132 | * @return true if the cleaned HTML document is echoed
133 | */
134 | @SuppressWarnings({"UnusedDeclaration"})
135 | public boolean isCleanedHtmlEchoed() {
136 | return cleanedHtmlEchoed;
137 | }
138 |
139 | /**
140 | * To see the cleaned and processed HTML document, set this to true. It will
141 | * be rendered to {@code System.out} for debugging purposes.
142 | * @param cleanedHtmlEchoed true to echo out the cleaned HTML document
143 | */
144 | public void setCleanedHtmlEchoed(boolean cleanedHtmlEchoed) {
145 | this.cleanedHtmlEchoed = cleanedHtmlEchoed;
146 | }
147 |
148 | /**
149 | * This class is used to handle conversions that convert directly to streams.
150 | */
151 | private final class StreamRemark extends Remark {
152 | private final Remark remark;
153 | private final Writer writer;
154 | private final OutputStream os;
155 |
156 | private StreamRemark(Remark remark, Writer writer) {
157 | this.remark = remark;
158 | this.writer = writer;
159 | this.os = null;
160 | }
161 | private StreamRemark(Remark remark, OutputStream out) {
162 | this.remark = remark;
163 | this.writer = null;
164 | this.os = out;
165 | }
166 |
167 | @Override
168 | public Remark withWriter(Writer writer) {
169 | return remark.withWriter(writer);
170 | }
171 |
172 | @Override
173 | public Remark withOutputStream(OutputStream os) {
174 | return remark.withOutputStream(os);
175 | }
176 |
177 | @Override
178 | public String convert(Document doc) {
179 | return remark.processConvert(doc, writer, os);
180 | }
181 | }
182 |
183 | /**
184 | * Use this method in a chain to handle streaming the output to a Writer.
185 | * The returned class can be saved for repeated writing to the same streams.
186 | *
187 | *
Note: The convert methods on the returned class will always return {@code null}.
188 | *
189 | *
Note: It is up to the calling class to handle closing the writer!
190 | *
191 | *
Example:
192 | *
193 | *
{@code new Remark(options).withWriter(myWiter).convert(htmlText);}
194 | *
195 | * @param writer Writer to receive the converted output
196 | * @return A Remark that writes to streams.
197 | */
198 | @SuppressWarnings({"WeakerAccess"})
199 | public synchronized Remark withWriter(Writer writer) {
200 | if(writer == null) {
201 | throw new NullPointerException("Writer cannot be null.");
202 | }
203 | return new StreamRemark(this, writer);
204 | }
205 |
206 | /**
207 | * Use this method in a chain to handle streaming the output to an OutputStream.
208 | * The returned class can be saved for repeated writing to the same streams.
209 | *
210 | *
Note: The convert methods on the returned class will always return {@code null}.
211 | *
212 | *
Note: It is up to the calling class to handle closing the stream!
213 | *
214 | *
Example:
215 | *
216 | *
{@code new Remark(options).withOutputStream(myOut).convert(htmlText);}
217 | *
218 | * @param os OutputStream to receive the converted output
219 | * @return A Remark that writes to streams.
220 | */
221 | @SuppressWarnings({"WeakerAccess"})
222 | public synchronized Remark withOutputStream(OutputStream os) {
223 | if(os == null) {
224 | throw new NullPointerException("OutputStream cannot be null.");
225 | }
226 | return new StreamRemark(this, os);
227 | }
228 |
229 | /**
230 | * Converts an HTML document retrieved from a URL to Markdown.
231 | * @param url URL to connect to.
232 | * @param timeoutMillis Maximum time to wait before giving up on the connection.
233 | * @return Markdown text.
234 | * @throws IOException If an error occurs while retrieving the document.
235 | * @see org.jsoup.Jsoup#parse(URL, int)
236 | */
237 | public String convert(URL url, int timeoutMillis) throws IOException {
238 | Document doc = Jsoup.parse(url, timeoutMillis);
239 | return convert(doc);
240 | }
241 |
242 |
243 | /**
244 | * Converts an HTML file to Markdown.
245 | * @param file The file to load.
246 | * @return Markdown text.
247 | * @throws IOException If an error occurs while loading the file.
248 | * @see org.jsoup.Jsoup#parse(File, String, String)
249 | */
250 | public String convert(File file) throws IOException {
251 | return convert(file, null);
252 | }
253 |
254 |
255 | /**
256 | * Converts an HTML file to Markdown.
257 | * @param file The file to load.
258 | * @param charset The charset of the file (if not specified and not UTF-8). Set to {@code null} to determine from {@code http-equiv} meta tag, if present, or fall back to {@code UTF-8} (which is often safe to do).
259 | * @return Markdown text.
260 | * @throws IOException If an error occurs while loading the file.
261 | * @see org.jsoup.Jsoup#parse(File, String, String)
262 | */
263 | @SuppressWarnings({"WeakerAccess", "SameParameterValue"})
264 | public String convert(File file, String charset) throws IOException {
265 | return convert(file, charset, "");
266 | }
267 |
268 |
269 | /**
270 | * Converts an HTML file to Markdown.
271 | * @param file The file to load.
272 | * @param charset The charset of the file (if not specified and not UTF-8). Set to {@code null} to determine from {@code http-equiv} meta tag, if present, or fall back to {@code UTF-8} (which is often safe to do).
273 | * @param baseUri The base URI for resolving relative links.
274 | * @return Markdown text.
275 | * @throws IOException If an error occurs while loading the file.
276 | * @see org.jsoup.Jsoup#parse(File, String, String)
277 | */
278 | public String convert(File file, String charset, String baseUri) throws IOException {
279 | Document doc = Jsoup.parse(file, charset, baseUri);
280 | return convert(doc);
281 | }
282 |
283 |
284 | /**
285 | * Converts HTML in memory to Markdown.
286 | * @param html The string to processConvert from HTML
287 | * @return Markdown text.
288 | * @see org.jsoup.Jsoup#parse(String, String)
289 | */
290 | public String convert(String html) {
291 | return convert(html, "");
292 | }
293 |
294 |
295 | /**
296 | * Converts HTML in memory to Markdown.
297 | * @param html The string to processConvert from HTML
298 | * @param baseUri The base URI for resolving relative links.
299 | * @return Markdown text.
300 | * @see org.jsoup.Jsoup#parse(String, String)
301 | */
302 | @SuppressWarnings({"WeakerAccess", "SameParameterValue"})
303 | public String convert(String html, String baseUri) {
304 | Document doc = Jsoup.parse(html, baseUri);
305 | return convert(doc);
306 | }
307 |
308 |
309 | /**
310 | * Converts an HTML body fragment to Markdown.
311 | * @param body The fragment string to processConvert from HTML
312 | * @return Markdown text.
313 | * @see org.jsoup.Jsoup#parseBodyFragment(String, String)
314 | */
315 | @SuppressWarnings({"UnusedDeclaration"})
316 | public String convertFragment(String body) {
317 | return convertFragment(body, "");
318 | }
319 |
320 |
321 | /**
322 | * Converts an HTML body fragment to Markdown.
323 | * @param body The fragment string to processConvert from HTML
324 | * @param baseUri The base URI for resolving relative links.
325 | * @return Markdown text.
326 | * @see org.jsoup.Jsoup#parseBodyFragment(String, String)
327 | */
328 | public String convertFragment(String body, String baseUri) {
329 | Document doc = Jsoup.parseBodyFragment(body, baseUri);
330 | return convert(doc);
331 | }
332 |
333 | /**
334 | * Converts an already-loaded JSoup Document to Markdown.
335 | *
336 | * @param doc Document to be processed
337 | * @return Markdown text.
338 | */
339 | @SuppressWarnings({"WeakerAccess"})
340 | public String convert(Document doc) {
341 | // Note: all convert methods should end up going through this method!
342 | return processConvert(doc, null, null);
343 | }
344 |
345 | /**
346 | * Handles the actual conversion
347 | * @param doc document to convert
348 | * @param writer Optional Writer for output
349 | * @param os Optional OutputStream for output
350 | * @return String result if not using an output stream, else null
351 | */
352 | private String processConvert(Document doc, Writer writer, OutputStream os) {
353 | String cleanString = Jsoup.clean(doc.html(), "https://www.baidu.com", Whitelist.relaxed().preserveRelativeLinks(true));
354 | Document parse = Jsoup.parse(cleanString);
355 | doc = parse;
356 | if(cleanedHtmlEchoed) {
357 | System.out.println("Cleaned and processed HTML document:");
358 | System.out.println(doc.toString());
359 | System.out.println();
360 | }
361 | String result = null;
362 | converterLock.lock();
363 | try {
364 | if(writer != null) {
365 | converter.convert(doc, writer);
366 | } else if(os != null) {
367 | converter.convert(doc, os);
368 | } else {
369 | result = converter.convert(doc);
370 | }
371 | } finally {
372 | converterLock.unlock();
373 | }
374 | return result;
375 | }
376 | }
377 |
--------------------------------------------------------------------------------
/src/main/java/com/overzealous/remark/convert/CmLine.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 OverZealous Creations, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.overzealous.remark.convert;
18 |
19 | import com.overzealous.remark.Options;
20 | import com.overzealous.remark.util.BlockWriter;
21 | import com.overzealous.remark.util.StringUtils;
22 | import org.jsoup.nodes.Element;
23 |
24 | /**
25 | * Handles preformatted sections (pre), renders them as code blocks.
26 | *
27 | * @author Phil DeJarnett
28 | */
29 | public class CmLine extends AbstractNodeHandler {
30 |
31 | /**
32 | * Converts a pre-formatted block of code.
33 | * Depending on the options, this may render as a block with four spaces added to the beginning,
34 | * or as a fenced code block.
35 | *
36 | * @param parent The previous node walker, in case we just want to remove an element.
37 | * @param node Node to handle
38 | * @param converter Parent converter for this object.
39 | */
40 | @Override
41 | public void handleNode(NodeHandler parent, Element node, DocumentConverter converter) {
42 | BlockWriter out;
43 | Options.FencedCodeBlocks fenced = converter.options.getFencedCodeBlocks();
44 | if(fenced.isEnabled()) {
45 | String fence = StringUtils.multiply(fenced.getSeparatorCharacter(),
46 | converter.options.fencedCodeBlocksWidth);
47 | out = converter.output;
48 | converter.output.startBlock();
49 | out.println(fence);
50 | out.write(converter.cleaner.cleanCode(node));
51 | out.println();
52 | out.print(fence);
53 | converter.output.endBlock();
54 | } else {
55 | converter.output.startBlock();
56 | out = new BlockWriter(converter.output);
57 | out.print("\r\n");
58 | out.write(converter.cleaner.cleanCode(node));
59 | out.print("\r\n");
60 | converter.output.endBlock();
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/main/java/com/overzealous/remark/convert/Codeblock.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 OverZealous Creations, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.overzealous.remark.convert;
18 |
19 | import com.overzealous.remark.Options;
20 | import com.overzealous.remark.util.BlockWriter;
21 | import com.overzealous.remark.util.StringUtils;
22 | import org.jsoup.nodes.Element;
23 |
24 | /**
25 | * Handles preformatted sections (pre), renders them as code blocks.
26 | *
27 | * @author Phil DeJarnett
28 | */
29 | public class Codeblock extends AbstractNodeHandler {
30 |
31 | /**
32 | * Converts a pre-formatted block of code.
33 | * Depending on the options, this may render as a block with four spaces added to the beginning,
34 | * or as a fenced code block.
35 | *
36 | * @param parent The previous node walker, in case we just want to remove an element.
37 | * @param node Node to handle
38 | * @param converter Parent converter for this object.
39 | */
40 | @Override
41 | public void handleNode(NodeHandler parent, Element node, DocumentConverter converter) {
42 | BlockWriter out;
43 | Options.FencedCodeBlocks fenced = converter.options.getFencedCodeBlocks();
44 | if(fenced.isEnabled()) {
45 | String fence = StringUtils.multiply(fenced.getSeparatorCharacter(),
46 | converter.options.fencedCodeBlocksWidth);
47 | out = converter.output;
48 | converter.output.startBlock();
49 | out.println(fence);
50 | out.write(converter.cleaner.cleanCode(node));
51 | out.println();
52 | out.print(fence);
53 | converter.output.endBlock();
54 | } else {
55 | converter.output.startBlock();
56 | out = new BlockWriter(converter.output);
57 | out.print("```java");
58 | out.print("\r\n");
59 | out.write(converter.cleaner.cleanCode(node));
60 | out.print("\r\n");
61 | out.print("```");
62 | converter.output.endBlock();
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/src/main/java/com/overzealous/remark/convert/Header.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 OverZealous Creations, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.overzealous.remark.convert;
18 |
19 | import com.overzealous.remark.util.BlockWriter;
20 | import com.overzealous.remark.util.StringUtils;
21 | import org.jsoup.nodes.Element;
22 |
23 | /**
24 | * Handles header nodes (h1 through h6)
25 | *
26 | * @author Phil DeJarnett
27 | */
28 | public class Header extends AbstractNodeHandler {
29 |
30 | /**
31 | * Renders a header node (h1..h6). If enabled, also handles the headerID attribute.
32 | *
33 | * @param parent The previous node walker, in case we just want to remove an element.
34 | * @param node Node to handle
35 | * @param converter Parent converter for this object.
36 | */
37 | public void handleNode(NodeHandler parent, Element node, DocumentConverter converter) {
38 | int depth = Integer.parseInt(node.tagName().substring(1, 2));
39 | BlockWriter out = converter.output;
40 | out.startBlock();
41 | StringUtils.multiply(out, '#', depth);
42 | out.print(' ');
43 | out.print(converter.getInlineContent(this, node).replace("\n", " "));
44 | out.print(' ');
45 | if(converter.options.headerIds && node.hasAttr("id")) {
46 | out.printf(" {#%s}", node.attr("id"));
47 | }
48 | out.endBlock();
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/com/overzealous/remark/convert/Image.java:
--------------------------------------------------------------------------------
1 | package com.overzealous.remark.convert;
2 |
3 |
4 | import com.overzealous.remark.util.BlockWriter;
5 | import org.apache.commons.lang3.StringUtils;
6 | import org.jsoup.nodes.Element;
7 |
8 | /**
9 | * Handles img tags.
10 | * @author Phil DeJarnett
11 | */
12 | public class Image extends AbstractNodeHandler {
13 |
14 | @Override
15 | public void handleNode(NodeHandler parent, Element node, DocumentConverter converter) {
16 | String url = converter.cleaner.cleanUrl(node.attr("src"));
17 | if (StringUtils.isNotBlank(url)) {
18 | String alt = node.attr("alt");
19 | converter.output.printf("", alt, url);
20 | }
21 |
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/com/overzealous/remark/convert/InlineStyle.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 OverZealous Creations, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.overzealous.remark.convert;
18 |
19 | import com.overzealous.remark.Options;
20 | import org.jsoup.nodes.Element;
21 | import org.jsoup.nodes.Node;
22 | import org.jsoup.nodes.TextNode;
23 |
24 | import java.util.regex.Matcher;
25 | import java.util.regex.Pattern;
26 |
27 | /**
28 | * Handles various inline styling (italics and bold), such as em, i, strong, b, span, and font tags.
29 | * @author Phil DeJarnett
30 | */
31 | public class InlineStyle extends AbstractNodeHandler {
32 |
33 | private static final char ITALICS_WRAPPER = '*';
34 | private static final String BOLD_WRAPPER = "**";
35 |
36 | private static final Pattern ITALICS_PATTERN = Pattern.compile("font-style:\\s*italic", Pattern.CASE_INSENSITIVE);
37 | private static final Pattern BOLD_PATTERN = Pattern.compile("font-weight:\\s*bold", Pattern.CASE_INSENSITIVE);
38 |
39 | private static final Pattern INWORD_CHARACTER = Pattern.compile("\\w");
40 |
41 | private static final Pattern SPACE_CONTENT_SPACE = Pattern.compile("^(\\s*+)(.*?)(\\s*)$", Pattern.DOTALL);
42 |
43 | private int italicDepth = 0;
44 | private int boldDepth = 0;
45 |
46 | /**
47 | * Renders inline styling (bold, italics) for the given tag. It handles implicit styling ({@code em}, {@code strong}) as
48 | * well as explicit styling via the {@code style} attribute.
49 | *
This object keeps track of the depth of the styling, to prevent recursive situations like this:
50 | *
51 | *
{@code hello world}
52 | *
53 | *
A naive method would be render the example incorrectly (the output would be {@code *hello **world*})
54 | *
55 | * @param parent The previous node walker, in case we just want to remove an element.
56 | * @param node Node to handle
57 | * @param converter Parent converter for this object.
58 | */
59 | public void handleNode(NodeHandler parent, Element node, DocumentConverter converter) {
60 | if(checkInnerBlock(node)) {
61 | // not valid to have an inline node around block nodes, so we have to
62 | // simply ignore them.
63 | // just recurse like it's not here.
64 | converter.walkNodes(parent, node);
65 | } else {
66 | Rules rules = checkInword(node, converter);
67 | if(rules.emphasisPreserved) {
68 | checkTag(node, rules);
69 |
70 | if(rules.bold || rules.italics) {
71 | handleStyled(parent, node, converter, rules);
72 | } else {
73 | converter.walkNodes(this, node, converter.inlineNodes);
74 | }
75 | } else { // emphasis has been disabled for this section
76 | // mark as if emphasis was already processed
77 | italicDepth++;
78 | boldDepth++;
79 | converter.walkNodes(this, node, converter.inlineNodes);
80 | italicDepth--;
81 | boldDepth--;
82 | }
83 | }
84 | }
85 |
86 | @Override
87 | public void handleTextNode(TextNode node, DocumentConverter converter) {
88 | // Override to provide special handling for ignoring
89 | // leading or trailing all-space nodes.
90 | if((node.previousSibling() != null && node.nextSibling() != null) ||
91 | node.text().trim().length() != 0) {
92 | super.handleTextNode(node, converter);
93 | }
94 | }
95 |
96 | /**
97 | * Minor class to hold onto the styling rules for this class.
98 | */
99 | private class Rules {
100 | boolean emphasisPreserved = true;
101 | boolean addSpacing = false;
102 | boolean italics = false;
103 | boolean bold = false;
104 | }
105 |
106 | /**
107 | * Handles dealing with a styled node (one that has markers on either side).
108 | *
109 | *
It's unique because we have to deal with leading and trailing spaces, among other issues.
110 | *
111 | * @param parent The previous node walker, in case we just want to remove an element.
112 | * @param node Node to handle
113 | * @param converter Parent converter for this object.
114 | * @param rules The styling rules that are active
115 | */
116 | private void handleStyled(NodeHandler parent, Element node, DocumentConverter converter, Rules rules) {
117 | // prevent double styling
118 | if(rules.bold) { boldDepth++; }
119 | if(rules.italics) { italicDepth++; }
120 | String content = converter.getInlineContent(this, node, true);
121 | if(rules.bold) { boldDepth--; }
122 | if(rules.italics) { italicDepth--; }
123 |
124 | // only proceed if we have content
125 | if(content.length() > 0) {
126 |
127 |
128 | Matcher parts = SPACE_CONTENT_SPACE.matcher(content);
129 | if(parts.find()) {
130 | // write any leading space
131 | converter.output.write(parts.group(1));
132 |
133 | // don't write the markers if the content ends up empty
134 | if(parts.group(2).length() > 0) {
135 |
136 | // write content
137 | converter.output.write(parts.group(2));
138 |
139 |
140 | }
141 |
142 | // write any trailing space
143 | converter.output.write(parts.group(3));
144 |
145 | } // else, something weird happened, like (1 == 0)
146 | }
147 | }
148 |
149 | /**
150 | * Check to see if there is a block-level node somewhere inside this node.
151 | *
152 | * @param node Current node
153 | * @return True is there is a block inside this node (which would be invalid HTML)
154 | */
155 | private boolean checkInnerBlock(Element node) {
156 | boolean blockExists = false;
157 | for(final Element child : node.children()) {
158 | blockExists = child.isBlock() || checkInnerBlock(child);
159 | if(blockExists) {
160 | break;
161 | }
162 | }
163 | return blockExists;
164 | }
165 |
166 | /**
167 | * Handles the situation where InWordEmphasis needs to be manipulated.
168 | *
169 | *
This isn't a terribly intelligent check - it merely looks for the
170 | * situation where a styled node is immediately followed by a
171 | * text node, and that text node starts with a word character.