├── .gitattributes
├── .gitignore
├── composer.json
├── composer.lock
├── convert.php
├── readme.md
└── vendor
├── autoload.php
├── bin
└── html-to-markdown
├── composer
├── ClassLoader.php
├── LICENSE
├── autoload_classmap.php
├── autoload_namespaces.php
├── autoload_psr4.php
├── autoload_real.php
├── autoload_static.php
└── installed.json
└── league
└── html-to-markdown
├── .github
├── FUNDING.yml
└── stale.yml
├── CHANGELOG.md
├── CONDUCT.md
├── LICENSE
├── README.md
├── bin
└── html-to-markdown
├── composer.json
└── src
├── Configuration.php
├── ConfigurationAwareInterface.php
├── Converter
├── BlockquoteConverter.php
├── CodeConverter.php
├── CommentConverter.php
├── ConverterInterface.php
├── DefaultConverter.php
├── DivConverter.php
├── EmphasisConverter.php
├── HardBreakConverter.php
├── HeaderConverter.php
├── HorizontalRuleConverter.php
├── ImageConverter.php
├── LinkConverter.php
├── ListBlockConverter.php
├── ListItemConverter.php
├── ParagraphConverter.php
├── PreformattedConverter.php
└── TextConverter.php
├── Element.php
├── ElementInterface.php
├── Environment.php
├── HtmlConverter.php
└── HtmlConverterInterface.php
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "require": {
3 | "league/html-to-markdown": "^4.9"
4 | }
5 | }
6 |
--------------------------------------------------------------------------------
/composer.lock:
--------------------------------------------------------------------------------
1 | {
2 | "_readme": [
3 | "This file locks the dependencies of your project to a known state",
4 | "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
5 | "This file is @generated automatically"
6 | ],
7 | "content-hash": "c422c69f74e9129c382716c129add662",
8 | "packages": [
9 | {
10 | "name": "league/html-to-markdown",
11 | "version": "4.10.0",
12 | "source": {
13 | "type": "git",
14 | "url": "https://github.com/thephpleague/html-to-markdown.git",
15 | "reference": "0868ae7a552e809e5cd8f93ba022071640408e88"
16 | },
17 | "dist": {
18 | "type": "zip",
19 | "url": "https://api.github.com/repos/thephpleague/html-to-markdown/zipball/0868ae7a552e809e5cd8f93ba022071640408e88",
20 | "reference": "0868ae7a552e809e5cd8f93ba022071640408e88",
21 | "shasum": ""
22 | },
23 | "require": {
24 | "ext-dom": "*",
25 | "ext-xml": "*",
26 | "php": ">=5.3.3"
27 | },
28 | "require-dev": {
29 | "mikehaertl/php-shellcommand": "~1.1.0",
30 | "phpunit/phpunit": "^4.8|^5.7",
31 | "scrutinizer/ocular": "~1.1"
32 | },
33 | "bin": [
34 | "bin/html-to-markdown"
35 | ],
36 | "type": "library",
37 | "extra": {
38 | "branch-alias": {
39 | "dev-master": "4.10-dev"
40 | }
41 | },
42 | "autoload": {
43 | "psr-4": {
44 | "League\\HTMLToMarkdown\\": "src/"
45 | }
46 | },
47 | "notification-url": "https://packagist.org/downloads/",
48 | "license": [
49 | "MIT"
50 | ],
51 | "authors": [
52 | {
53 | "name": "Colin O'Dell",
54 | "email": "colinodell@gmail.com",
55 | "homepage": "https://www.colinodell.com",
56 | "role": "Lead Developer"
57 | },
58 | {
59 | "name": "Nick Cernis",
60 | "email": "nick@cern.is",
61 | "homepage": "http://modernnerd.net",
62 | "role": "Original Author"
63 | }
64 | ],
65 | "description": "An HTML-to-markdown conversion helper for PHP",
66 | "homepage": "https://github.com/thephpleague/html-to-markdown",
67 | "keywords": [
68 | "html",
69 | "markdown"
70 | ],
71 | "funding": [
72 | {
73 | "url": "https://www.colinodell.com/sponsor",
74 | "type": "custom"
75 | },
76 | {
77 | "url": "https://www.paypal.me/colinpodell/10.00",
78 | "type": "custom"
79 | },
80 | {
81 | "url": "https://github.com/colinodell",
82 | "type": "github"
83 | },
84 | {
85 | "url": "https://www.patreon.com/colinodell",
86 | "type": "patreon"
87 | }
88 | ],
89 | "time": "2020-07-01T00:34:03+00:00"
90 | }
91 | ],
92 | "packages-dev": [],
93 | "aliases": [],
94 | "minimum-stability": "stable",
95 | "stability-flags": [],
96 | "prefer-stable": false,
97 | "prefer-lowest": false,
98 | "platform": [],
99 | "platform-dev": [],
100 | "plugin-api-version": "1.1.0"
101 | }
102 |
--------------------------------------------------------------------------------
/convert.php:
--------------------------------------------------------------------------------
1 | "http://wordpress.org/export/1.2/excerpt/",
22 | 'content' => "http://purl.org/rss/1.0/modules/content/",
23 | 'wfw' => "http://wellformedweb.org/CommentAPI/",
24 | 'dc' => "http://purl.org/dc/elements/1.1/",
25 | 'wp' => "http://wordpress.org/export/1.2/"
26 | );
27 |
28 | // Specify the source XML file
29 |
30 | $importfile = 'data.xml';
31 |
32 | // Specify the directory where files will be exported, including a trailing slash
33 |
34 | $exportdir = 'export/';
35 |
36 | // Get the contents of the XML file
37 |
38 | $xml = file_get_contents($importfile);
39 | $xml = new SimpleXmlElement($xml);
40 |
41 | // Grab all the things!
42 |
43 | foreach ($xml->channel->item as $item) {
44 | $article = array();
45 | $article['title'] = $item->title;
46 | $article['link'] = $item->link;
47 | $article['datestamp'] = $item->pubDate;
48 | $article['timestamp'] = strtotime($item->pubDate);
49 | $article['description'] = (string) trim($item->description);
50 | $article['image'] = (string) trim($item->children($ns['wp'])->attachment_url);
51 | if ($article['image']) {
52 | $article['imagedata'] = file_get_contents($article['image']);
53 | }
54 |
55 | // Grab categories and tags for each post
56 |
57 | $tags = array();
58 | $categories = array();
59 | foreach ($item->category as $cat) {
60 | $cattype = $cat['domain'];
61 |
62 | if($cattype == "post_tag") {
63 | array_push($tags,$cat);
64 | }
65 | elseif($cattype == "category") {
66 | array_push($categories,$cat);
67 | }
68 | }
69 |
70 | // Grab data within specific namespaces
71 |
72 | $content = $item->children($ns['content']);
73 | $wfw = $item->children($ns['wfw']);
74 | $wp = $item->children($ns['wp']);
75 |
76 | $article['postid'] = $wp->post_id;
77 | $article['content'] = (string) trim($content->encoded);
78 | $article['content'] = str_replace(PHP_EOL . PHP_EOL, '
', $article['content']);
79 | $article['content'] = mb_convert_encoding($article['content'], 'HTML-ENTITIES', "UTF-8");
80 |
81 | // Convert HTML to Markdown, set optional parameters
82 |
83 | $converter = new HtmlConverter();
84 | $converter->getConfig()->setOption('hard_break', true);
85 | $converter->getConfig()->setOption('strip_tags', true);
86 | $markdown = $converter->convert($article['content']);
87 |
88 | // Strip WordPress caption shortcodes, optional
89 |
90 | $markdown = preg_replace("/\[caption(.*?)\]/", "", $markdown);
91 | $markdown = preg_replace("/\[\/caption\]/", "", $markdown);
92 |
93 | // Prepare various bits of content for the export
94 |
95 | if ($article['title'] != '')
96 | { $tmptitle = str_replace(' ', '-', $article['title']) ; }
97 | else
98 | { $tmptitle = $article['postid'] ; }
99 |
100 | // Convert accented characters to plain ASCII
101 | $tmptitle = iconv('utf-8', 'ascii//TRANSLIT', $tmptitle);
102 | // Remove slashes
103 | $tmptitle = preg_replace('/[^A-Za-z0-9\-]/', '', $tmptitle);
104 | // Convert to lowercase
105 | $tmptitle = strtolower($tmptitle);
106 | $imagename = basename($article['image']);
107 | $tmpyear = date('Y', strtotime($article['datestamp']));
108 | $tmpdate = date('Y/Ymd', strtotime($article['datestamp']));
109 | $file = $exportdir . $tmpdate . '-' . $tmptitle . '/post.txt';
110 | $fileimage = $exportdir . $tmpdate . '-' . $tmptitle . '/' . $imagename;
111 | $folder = $exportdir . $tmpdate . '-' . $tmptitle;
112 |
113 | // Create the directory for the export
114 |
115 | if (!mkdir($folder, 0777, true)) {
116 | die('Failed to create folders...'. $folder);
117 | }
118 |
119 | // Compile the content for the export
120 |
121 | $strtowrite = "Title: " . $article['title']
122 | . PHP_EOL . PHP_EOL . "----" . PHP_EOL . PHP_EOL
123 | . "Date: " . $article['datestamp']
124 | . PHP_EOL. PHP_EOL . "----" . PHP_EOL . PHP_EOL
125 | . "Post ID: " . $article['postid']
126 | . PHP_EOL. PHP_EOL . "----" . PHP_EOL . PHP_EOL
127 | . "Category: " . implode(', ', $categories)
128 | . PHP_EOL. PHP_EOL . "----" . PHP_EOL . PHP_EOL
129 | . "Tags: " . implode(', ', $tags)
130 | . PHP_EOL. PHP_EOL . "----" . PHP_EOL . PHP_EOL
131 | . ( $imagename ?
132 | "Featured: " . $imagename
133 | . PHP_EOL. PHP_EOL . "----" . PHP_EOL . PHP_EOL : '' )
134 | . "Text: " . PHP_EOL. PHP_EOL . $markdown;
135 |
136 | // Save the article.txt file
137 |
138 | file_put_contents($file, $strtowrite);
139 |
140 | // Save the image file associated with the post, if there is one
141 |
142 | if ($article['image']) {
143 | file_put_contents($fileimage, $article['imagedata']);
144 | }
145 |
146 | // Report what happened
147 |
148 | echo 'File written: ' . $file . ' at ' . date('Y-m-d H:i:s') . PHP_EOL;
149 |
150 | }
151 |
152 | ?>
153 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # WordPress XML to Kirby
2 |
3 | This script converts an XML file that has been exported in [WordPress eXtended RSS (WXR)](https://wordpress.org/support/article/tools-export-screen/) format to a flat file YAML structure for use with [Kirby](https://getkirby.com/).
4 |
5 | This version of the code is based on the original [WPXML to Kirby](https://github.com/greywillfade/wpxml-to-kirby) script by [Sally Lait](https://sallylait.com/) with further modifications made by [Stay Regular Media](https://github.com/stayregular/wpxml-to-kirby).
6 |
7 |
8 | ## Requirements
9 |
10 | + [PHP](https://www.php.net/) 7.2 or later
11 | + [Composer](https://getcomposer.org/)
12 | + [HTML To Markdown for PHP](https://github.com/thephpleague/html-to-markdown)
13 |
14 |
15 | ## Usage
16 |
17 | + Download this repository and extract the contents to a working directory
18 |
19 | ```
20 | /wordpress-xml-to-kirby
21 | ```
22 |
23 | + Install the [Composer](https://getcomposer.org/) dependency manager
24 | + Require the [HTML To Markdown for PHP](https://github.com/thephpleague/html-to-markdown) library
25 |
26 | ```
27 | composer require league/html-to-markdown
28 | ```
29 |
30 | + [Export the content of your WordPress site](https://wordpress.org/documentation/article/tools-export-screen/) to an XML file
31 | + To include featured image metadata in the XML file, [see below](#include-featured-image-metadata)
32 | + Move to XML file to the working directory
33 | + Create an export directory in the working directory with full permissions
34 |
35 | ```
36 | mkdir /wordpress-xml-to-kirby/export`
37 | chmod 777 /wordpress-xml-to-kirby/export
38 | ```
39 |
40 | + Edit `convert.php` to add the name of the XML file and the export directory
41 |
42 | ```php
43 | $importfile = 'data.xml';
44 | $exportdir = 'export/';
45 | ```
46 |
47 | + Convert all the things!
48 |
49 | ```
50 | php convert.php
51 | ```
52 |
53 |
54 | ## Include Featured Image Metadata
55 |
56 | To include the featured image metadata in the XML file, the WordPress core `export.php` file must be modified.
57 |
58 | + Open `wp-admin/includes/export.php` in your favourite text editor
59 | + Locate the following code block:
60 |
61 | ```php
62 |
` tags (#174, #178)
23 |
24 | ## [4.8.2] - 2019-08-02
25 | ### Fixed
26 | - Fixed headers not being placed onto a new line in some cases (#172)
27 | - Fixed handling of links containing spaces (#175)
28 |
29 | ### Removed
30 | - Removed support for HHVM
31 |
32 | ## [4.8.1] - 2018-12-24
33 | ### Added
34 | - Added support for PHP 7.3
35 |
36 | ### Fixed
37 | - Fixed paragraphs following tables (#165, #166)
38 | - Fixed incorrect list item escaping (#168, #169)
39 |
40 | ## [4.8.0] - 2018-09-18
41 | ### Added
42 | - Added support for email auto-linking
43 | - Added a new interface (`HtmlConverterInterface`) for the main `HtmlConverter` class
44 | - Added additional test cases (#14)
45 |
46 | ### Changed
47 | - The `italic_style` option now defaults to `'*'` so that in-word emphasis is handled properly (#75)
48 |
49 | ### Fixed
50 | - Fixed several issues of `` and `` tags not converting to blocks or inlines properly (#26, #70, #102, #140, #161, #162)
51 | - Fixed in-word emphasis using underscores as delimiter (#75)
52 | - Fixed character escaping inside of `` elements
53 | - Fixed header edge cases
54 |
55 | ### Deprecated
56 | - The `bold_style` and `italic_style` options have been deprecated (#75)
57 |
58 | ## [4.7.0] - 2018-05-19
59 | ### Added
60 | - Added `setOptions()` function for chainable calling (#149)
61 | - Added new `list_item_style_alternate` option for converting every-other list with a different character (#155)
62 |
63 | ### Fixed
64 | - Fixed insufficient newlines after code blocks (#144, #148)
65 | - Fixed trailing spaces not being preserved in link anchors (#157)
66 | - Fixed list-like lines not being escaped inside of lists items (#159)
67 |
68 | ## [4.6.2]
69 | ### Fixed
70 | - Fixed issue with emphasized spaces (#146)
71 |
72 | ## [4.6.1]
73 | ### Fixed
74 | - Fixed conversion of `` tags (#145)
75 |
76 | ## [4.6.0]
77 | ### Added
78 | - Added support for ordered lists starting at numbers other than 1
79 |
80 | ### Fixed
81 | - Fixed overly-eager escaping of list-like text (#141)
82 |
83 | ## [4.5.0]
84 | ### Added
85 | - Added configuration option for list item style (#135, #136)
86 |
87 | ## [4.4.1]
88 |
89 | ### Fixed
90 | - Fixed autolinking of invalid URLs (#129)
91 |
92 | ## [4.4.0]
93 |
94 | ### Added
95 | - Added `hard_break` configuration option (#112, #115)
96 | - The `HtmlConverter` can now be instantiated with an `Environment` (#118)
97 |
98 | ### Fixed
99 | - Fixed handling of paragraphs in list item elements (#47, #110)
100 | - Fixed phantom spaces when newlines follow `br` elements (#116, #117)
101 | - Fixed link converter not sanitizing inner spaces properly (#119, #120)
102 |
103 | ## [4.3.1]
104 | ### Changed
105 | - Revised the sanitization implementation (#109)
106 |
107 | ### Fixed
108 | - Fixed tag-like content not being escaped (#67, #109)
109 | - Fixed thematic break-like content not being escaped (#65, #109)
110 | - Fixed codefence-like content not being escaped (#64, #109)
111 |
112 | ## [4.3.0]
113 | ### Added
114 | - Added full support for PHP 7.0 and 7.1
115 |
116 | ### Changed
117 | - Changed `` and `` conversions to use backticks instead of indendation (#102)
118 |
119 | ### Fixed
120 | - Fixed issue where specified code language was not preserved (#70, #102)
121 | - Fixed issue where `` tags nested in `` was not converted properly (#70, #102)
122 | - Fixed header-like content not being escaped (#76, #105)
123 | - Fixed blockquote-like content not being escaped (#77, #103)
124 | - Fixed ordered list-like content not being escaped (#73, #106)
125 | - Fixed unordered list-like content not being escaped (#71, #107)
126 |
127 | ## [4.2.2]
128 | ### Fixed
129 | - Fixed sanitization bug which sometimes removes desired content (#63, #101)
130 |
131 | ## [4.2.1]
132 | ### Fixed
133 | - Fixed path to autoload.php when used as a library (#98)
134 | - Fixed edge case for tags containing only whitespace (#99)
135 |
136 | ### Removed
137 | - Removed double HTML entity decoding, as this is not desireable (#60)
138 |
139 | ## [4.2.0]
140 |
141 | ### Added
142 | - Added the ability to invoke HtmlConverter objects as functions (#85)
143 |
144 | ### Fixed
145 | - Fixed improper handling of nested list items (#19 and #84)
146 | - Fixed preceeding or trailing spaces within emphasis tags (#83)
147 |
148 | ## [4.1.1]
149 |
150 | ### Fixed
151 | - Fixed conversion of empty paragraphs (#78)
152 | - Fixed `preg_replace` so it wouldn't break UTF-8 characters (#79)
153 |
154 | ## [4.1.0]
155 |
156 | ### Added
157 | - Added `bin/html-to-markdown` script
158 |
159 | ### Changed
160 | - Changed default italic character to `_` (#58)
161 |
162 | ## [4.0.1]
163 |
164 | ### Fixed
165 | - Added escaping to avoid * and _ in a text being rendered as emphasis (#48)
166 |
167 | ### Removed
168 | - Removed the demo (#51)
169 | - `.styleci.yml` and `CONTRIBUTING.md` are no longer included in distributions (#50)
170 |
171 | ## [4.0.0]
172 |
173 | This release changes the visibility of several methods/properties. #42 and #43 brought to light that some visiblities were
174 | not ideally set, so this releases fixes that. Moving forwards this should reduce the chance of introducing BC-breaking changes.
175 |
176 | ### Added
177 | - Added new `HtmlConverter::getEnvironment()` method to expose the `Environment` (#42, #43)
178 |
179 | ### Changed
180 | - Changed `Environment::addConverter()` from `protected` to `public`, enabling custom converters to be added (#42, #43)
181 | - Changed `HtmlConverter::createDOMDocument()` from `protected` to `private`
182 | - Changed `Element::nextCached` from `protected` to `private`
183 | - Made the `Environment` class `final`
184 |
185 | ## [3.1.1]
186 | ### Fixed
187 | - Empty HTML strings now result in empty Markdown documents (#40, #41)
188 |
189 | ## [3.1.0]
190 | ### Added
191 | - Added new `equals` method to `Element` to check for equality
192 |
193 | ### Changes
194 | - Use Linux line endings consistently instead of plaform-specific line endings (#36)
195 |
196 | ### Fixed
197 | - Cleaned up code style
198 |
199 | ## [3.0.0]
200 | ### Changed
201 | - Changed namespace to `League\HTMLToMarkdown`
202 | - Changed packagist name to `league/html-to-markdown`
203 | - Re-organized code into several separate classes
204 | - `` tags with identical href and inner text are now rendered using angular bracket syntax (#31)
205 | - `` elements are now treated as block-level elements (#33)
206 |
207 | ## [2.2.2]
208 | ### Added
209 | - Added support for PHP 5.6 and HHVM
210 | - Enabled testing against PHP 7 nightlies
211 | - Added this CHANGELOG.md
212 |
213 | ### Fixed
214 | - Fixed whitespace preservation between inline elements (#9 and #10)
215 |
216 | ## [2.2.1]
217 | ### Fixed
218 | - Preserve placeholder links (#22)
219 |
220 | ## [2.2.0]
221 | ### Added
222 | - Added CircleCI config
223 |
224 | ### Changed
225 | - `` blocks are now treated as code elements
226 |
227 | ### Removed
228 | - Dropped support for PHP 5.2
229 | - Removed incorrect README comment regarding `#text` nodes (#17)
230 |
231 | ## [2.1.2]
232 | ### Added
233 | - Added the ability to blacklist/remove specific node types (#11)
234 |
235 | ### Changed
236 | - Line breaks are now placed after divs instead of before them
237 | - Newlines inside of link texts are now removed
238 | - Updated the minimum PHPUnit version to 4.*
239 |
240 | ## [2.1.1]
241 | ### Added
242 | - Added options to customize emphasis characters
243 |
244 | ## [2.1.0]
245 | ### Added
246 | - Added option to strip HTML tags without Markdown equivalents
247 | - Added `convert()` method for converter reuse
248 | - Added ability to set options after instance construction
249 | - Documented the required PHP extensions (#4)
250 |
251 | ### Changed
252 | - ATX style now used for h1 and h2 tags inside blockquotes
253 |
254 | ### Fixed
255 | - Newlines inside blockquotes are now started with a bracket
256 | - Fixed some incorrect docblocks
257 | - `__toString()` now returns an empty string if input is empty
258 | - Convert head tag if body tag is empty (#7)
259 | - Preserve special characters inside tags without md equivalents (#6)
260 |
261 |
262 | ## [2.0.1]
263 | ### Fixed
264 | - Fixed first line indentation for multi-line code blocks
265 | - Fixed consecutive anchors get separating spaces stripped (#3)
266 |
267 | ## [2.0.0]
268 | ### Added
269 | - Initial release
270 |
271 | [unreleased]: https://github.com/thephpleague/html-to-markdown/compare/4.10.0...master
272 | [4.10.0]: https://github.com/thephpleague/html-to-markdown/compare/4.9.1...4.10.0
273 | [4.9.1]: https://github.com/thephpleague/html-to-markdown/compare/4.9.0...4.9.1
274 | [4.9.0]: https://github.com/thephpleague/html-to-markdown/compare/4.8.3...4.9.0
275 | [4.8.3]: https://github.com/thephpleague/html-to-markdown/compare/4.8.2...4.8.3
276 | [4.8.2]: https://github.com/thephpleague/html-to-markdown/compare/4.8.1...4.8.2
277 | [4.8.1]: https://github.com/thephpleague/html-to-markdown/compare/4.8.0...4.8.1
278 | [4.8.0]: https://github.com/thephpleague/html-to-markdown/compare/4.7.0...4.8.0
279 | [4.7.0]: https://github.com/thephpleague/html-to-markdown/compare/4.6.2...4.7.0
280 | [4.6.2]: https://github.com/thephpleague/html-to-markdown/compare/4.6.1...4.6.2
281 | [4.6.1]: https://github.com/thephpleague/html-to-markdown/compare/4.6.0...4.6.1
282 | [4.6.0]: https://github.com/thephpleague/html-to-markdown/compare/4.5.0...4.6.0
283 | [4.5.0]: https://github.com/thephpleague/html-to-markdown/compare/4.4.1...4.5.0
284 | [4.4.1]: https://github.com/thephpleague/html-to-markdown/compare/4.4.0...4.4.1
285 | [4.4.0]: https://github.com/thephpleague/html-to-markdown/compare/4.3.1...4.4.0
286 | [4.3.1]: https://github.com/thephpleague/html-to-markdown/compare/4.3.0...4.3.1
287 | [4.3.0]: https://github.com/thephpleague/html-to-markdown/compare/4.2.2...4.3.0
288 | [4.2.2]: https://github.com/thephpleague/html-to-markdown/compare/4.2.1...4.2.2
289 | [4.2.1]: https://github.com/thephpleague/html-to-markdown/compare/4.2.0...4.2.1
290 | [4.2.0]: https://github.com/thephpleague/html-to-markdown/compare/4.1.1...4.2.0
291 | [4.1.1]: https://github.com/thephpleague/html-to-markdown/compare/4.1.0...4.1.1
292 | [4.1.0]: https://github.com/thephpleague/html-to-markdown/compare/4.0.1...4.1.0
293 | [4.0.1]: https://github.com/thephpleague/html-to-markdown/compare/4.0.0...4.0.1
294 | [4.0.0]: https://github.com/thephpleague/html-to-markdown/compare/3.1.1...4.0.0
295 | [3.1.1]: https://github.com/thephpleague/html-to-markdown/compare/3.1.0...3.1.1
296 | [3.1.0]: https://github.com/thephpleague/html-to-markdown/compare/3.0.0...3.1.0
297 | [3.0.0]: https://github.com/thephpleague/html-to-markdown/compare/2.2.2...3.0.0
298 | [2.2.2]: https://github.com/thephpleague/html-to-markdown/compare/2.2.1...2.2.2
299 | [2.2.1]: https://github.com/thephpleague/html-to-markdown/compare/2.2.0...2.2.1
300 | [2.2.0]: https://github.com/thephpleague/html-to-markdown/compare/2.1.2...2.2.0
301 | [2.1.2]: https://github.com/thephpleague/html-to-markdown/compare/2.1.1...2.1.2
302 | [2.1.1]: https://github.com/thephpleague/html-to-markdown/compare/2.1.0...2.1.1
303 | [2.1.0]: https://github.com/thephpleague/html-to-markdown/compare/2.0.1...2.1.0
304 | [2.0.1]: https://github.com/thephpleague/html-to-markdown/compare/2.0.0...2.0.1
305 | [2.0.0]: https://github.com/thephpleague/html-to-markdown/compare/775f91e...2.0.0
306 |
307 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Code of Conduct
2 |
3 | As contributors and maintainers of this project, and in the interest of fostering an open and welcoming community, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
4 |
5 | We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, or nationality.
6 |
7 | Examples of unacceptable behavior by participants include:
8 |
9 | * The use of sexualized language or imagery
10 | * Personal attacks
11 | * Trolling or insulting/derogatory comments
12 | * Public or private harassment
13 | * Publishing other's private information, such as physical or electronic addresses, without explicit permission
14 | * Other unethical or unprofessional conduct.
15 |
16 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. By adopting this Code of Conduct, project maintainers commit themselves to fairly and consistently applying these principles to every aspect of managing this project. Project maintainers who do not follow or enforce the Code of Conduct may be permanently removed from the project team.
17 |
18 | This code of conduct applies both within project spaces and in public spaces when an individual is representing the project or its community.
19 |
20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.
21 |
22 | This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.2.0, available at [http://contributor-covenant.org/version/1/2/0/](http://contributor-covenant.org/version/1/2/0/)
23 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Colin O'Dell
4 |
5 | Originally created by Nick Cernis
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | this software and associated documentation files (the "Software"), to deal in
9 | the Software without restriction, including without limitation the rights to
10 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11 | the Software, and to permit persons to whom the Software is furnished to do so,
12 | subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
19 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
20 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/README.md:
--------------------------------------------------------------------------------
1 | HTML To Markdown for PHP
2 | ========================
3 |
4 | [](https://gitter.im/thephpleague/html-to-markdown?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
5 |
6 | [](https://packagist.org/packages/league/html-to-markdown)
7 | [](LICENSE)
8 | [](https://travis-ci.org/thephpleague/html-to-markdown)
9 | [](https://scrutinizer-ci.com/g/thephpleague/html-to-markdown/code-structure)
10 | [](https://scrutinizer-ci.com/g/thephpleague/html-to-markdown)
11 | [](https://packagist.org/packages/league/html-to-markdown)
12 |
13 | Library which converts HTML to [Markdown](http://daringfireball.net/projects/markdown/) for your sanity and convenience.
14 |
15 |
16 | **Requires**: PHP 5.3+ or PHP 7.0+
17 |
18 | **Lead Developer**: [@colinodell](http://twitter.com/colinodell)
19 |
20 | **Original Author**: [@nickcernis](http://twitter.com/nickcernis)
21 |
22 |
23 | ### Why convert HTML to Markdown?
24 |
25 | *"What alchemy is this?"* you mutter. *"I can see why you'd convert [Markdown to HTML](https://github.com/thephpleague/commonmark),"* you continue, already labouring the question somewhat, *"but why go the other way?"*
26 |
27 | Typically you would convert HTML to Markdown if:
28 |
29 | 1. You have an existing HTML document that needs to be edited by people with good taste.
30 | 2. You want to store new content in HTML format but edit it as Markdown.
31 | 3. You want to convert HTML email to plain text email.
32 | 4. You know a guy who's been converting HTML to Markdown for years, and now he can speak Elvish. You'd quite like to be able to speak Elvish.
33 | 5. You just really like Markdown.
34 |
35 | ### How to use it
36 |
37 | Require the library by issuing this command:
38 |
39 | ```bash
40 | composer require league/html-to-markdown
41 | ```
42 |
43 | Add `require 'vendor/autoload.php';` to the top of your script.
44 |
45 | Next, create a new HtmlConverter instance, passing in your valid HTML code to its `convert()` function:
46 |
47 | ```php
48 | use League\HTMLToMarkdown\HtmlConverter;
49 |
50 | $converter = new HtmlConverter();
51 |
52 | $html = "Quick, to the Batpoles!
";
53 | $markdown = $converter->convert($html);
54 | ```
55 |
56 | The `$markdown` variable now contains the Markdown version of your HTML as a string:
57 |
58 | ```php
59 | echo $markdown; // ==> ### Quick, to the Batpoles!
60 | ```
61 |
62 | The included `demo` directory contains an HTML->Markdown conversion form to try out.
63 |
64 | ### Conversion options
65 |
66 | By default, HTML To Markdown preserves HTML tags without Markdown equivalents, like `` and ``.
67 |
68 | To strip HTML tags that don't have a Markdown equivalent while preserving the content inside them, set `strip_tags` to true, like this:
69 |
70 | ```php
71 | $converter = new HtmlConverter(array('strip_tags' => true));
72 |
73 | $html = 'Turnips!';
74 | $markdown = $converter->convert($html); // $markdown now contains "Turnips!"
75 | ```
76 |
77 | Or more explicitly, like this:
78 |
79 | ```php
80 | $converter = new HtmlConverter();
81 | $converter->getConfig()->setOption('strip_tags', true);
82 |
83 | $html = 'Turnips!';
84 | $markdown = $converter->convert($html); // $markdown now contains "Turnips!"
85 | ```
86 |
87 | Note that only the tags themselves are stripped, not the content they hold.
88 |
89 | To strip tags and their content, pass a space-separated list of tags in `remove_nodes`, like this:
90 |
91 | ```php
92 | $converter = new HtmlConverter(array('remove_nodes' => 'span div'));
93 |
94 | $html = 'Turnips!Monkeys!';
95 | $markdown = $converter->convert($html); // $markdown now contains ""
96 | ```
97 |
98 | By default, all comments are stripped from the content. To preserve them, use the `preserve_comments` option, like this:
99 |
100 | ```php
101 | $converter = new HtmlConverter(array('preserve_comments' => true));
102 |
103 | $html = 'Turnips!';
104 | $markdown = $converter->convert($html); // $markdown now contains "Turnips!"
105 | ```
106 |
107 | To preserve only specific comments, set `preserve_comments` with an array of strings, like this:
108 |
109 | ```php
110 | $converter = new HtmlConverter(array('preserve_comments' => array('Eggs!')));
111 |
112 | $html = 'Turnips!';
113 | $markdown = $converter->convert($html); // $markdown now contains "Turnips!"
114 | ```
115 |
116 | ### Style options
117 |
118 | By default bold tags are converted using the asterisk syntax, and italic tags are converted using the underlined syntax. Change these by using the `bold_style` and `italic_style` options.
119 |
120 | ```php
121 | $converter = new HtmlConverter();
122 | $converter->getConfig()->setOption('italic_style', '*');
123 | $converter->getConfig()->setOption('bold_style', '__');
124 |
125 | $html = 'Italic and a bold';
126 | $markdown = $converter->convert($html); // $markdown now contains "*Italic* and a __bold__"
127 | ```
128 |
129 | ### Line break options
130 |
131 | By default, `br` tags are converted to two spaces followed by a newline character as per [traditional Markdown](https://daringfireball.net/projects/markdown/syntax#p). Set `hard_break` to `true` to omit the two spaces, as per GitHub Flavored Markdown (GFM).
132 |
133 | ```php
134 | $converter = new HtmlConverter();
135 | $html = 'test
line break
';
136 |
137 | $converter->getConfig()->setOption('hard_break', true);
138 | $markdown = $converter->convert($html); // $markdown now contains "test\nline break"
139 |
140 | $converter->getConfig()->setOption('hard_break', false); // default
141 | $markdown = $converter->convert($html); // $markdown now contains "test \nline break"
142 | ```
143 |
144 | ### Autolinking options
145 |
146 | By default, `a` tags are converted to the easiest possible link syntax, i.e. if no text or title is available, then the `` syntax will be used rather than the full `[url](url)` syntax. Set `use_autolinks` to `false` to change this behavior to always use the full link syntax.
147 |
148 | ```php
149 | $converter = new HtmlConverter();
150 | $html = '';
151 |
152 | $converter->getConfig()->setOption('use_autolinks', true);
153 | $markdown = $converter->convert($html); // $markdown now contains ""
154 |
155 | $converter->getConfig()->setOption('use_autolinks', false); // default
156 | $markdown = $converter->convert($html); // $markdown now contains "[https://google.com](https://google.com)"
157 | ```
158 |
159 | ### Passing custom Environment object
160 |
161 | You can pass current `Environment` object to customize i.e. which converters should be used.
162 |
163 | ```php
164 | $environment = new Environment(array(
165 | // your configuration here
166 | ));
167 | $environment->addConverter(new HeaderConverter()); // optionally - add converter manually
168 |
169 | $converter = new HtmlConverter($environment);
170 |
171 | $html = 'Header
172 |
173 | ';
174 | $markdown = $converter->convert($html); // $markdown now contains "### Header" and "
"
175 | ```
176 |
177 | ### Limitations
178 |
179 | - Markdown Extra, MultiMarkdown and other variants aren't supported – just Markdown.
180 |
181 | ### Known issues
182 |
183 | - Nested lists and lists containing multiple paragraphs aren't converted correctly.
184 | - Lists inside blockquotes aren't converted correctly.
185 | - Any reported [open issues here](https://github.com/thephpleague/html-to-markdown/issues?state=open).
186 |
187 | [Report your issue or request a feature here.](https://github.com/thephpleague/html-to-markdown/issues/new) Issues with patches or failing tests are especially welcome.
188 |
189 | ### Style notes
190 |
191 | - Setext (underlined) headers are the default for H1 and H2. If you prefer the ATX style for H1 and H2 (# Header 1 and ## Header 2), set `header_style` to 'atx' in the options array when you instantiate the object:
192 |
193 | `$converter = new HtmlConverter(array('header_style'=>'atx'));`
194 |
195 | Headers of H3 priority and lower always use atx style.
196 |
197 | - Links and images are referenced inline. Footnote references (where image src and anchor href attributes are listed in the footnotes) are not used.
198 | - Blockquotes aren't line wrapped – it makes the converted Markdown easier to edit.
199 |
200 | ### Dependencies
201 |
202 | HTML To Markdown requires PHP's [xml](http://www.php.net/manual/en/xml.installation.php), [lib-xml](http://www.php.net/manual/en/libxml.installation.php), and [dom](http://www.php.net/manual/en/dom.installation.php) extensions, all of which are enabled by default on most distributions.
203 |
204 | Errors such as "Fatal error: Class 'DOMDocument' not found" on distributions such as CentOS that disable PHP's xml extension can be resolved by installing php-xml.
205 |
206 | ### Contributors
207 |
208 | Many thanks to all [contributors](https://github.com/thephpleague/html-to-markdown/graphs/contributors) so far. Further improvements and feature suggestions are very welcome.
209 |
210 | ### How it works
211 |
212 | HTML To Markdown creates a DOMDocument from the supplied HTML, walks through the tree, and converts each node to a text node containing the equivalent markdown, starting from the most deeply nested node and working inwards towards the root node.
213 |
214 | ### To-do
215 |
216 | - Support for nested lists and lists inside blockquotes.
217 | - Offer an option to preserve tags as HTML if they contain attributes that can't be represented with Markdown (e.g. `style`).
218 |
219 | ### Trying to convert Markdown to HTML?
220 |
221 | Use one of these great libraries:
222 |
223 | - [league/commonmark](https://github.com/thephpleague/commonmark) (recommended)
224 | - [cebe/markdown](https://github.com/cebe/markdown)
225 | - [PHP Markdown](https://michelf.ca/projects/php-markdown/)
226 | - [Parsedown](https://github.com/erusev/parsedown)
227 |
228 | No guarantees about the Elvish, though.
229 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/bin/html-to-markdown:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | $arg) {
9 | if ($i === 0) {
10 | continue;
11 | }
12 |
13 | if (substr($arg, 0, 1) === '-') {
14 | switch ($arg) {
15 | case '-h':
16 | case '--help':
17 | echo getHelpText();
18 | exit(0);
19 | default:
20 | fail('Unknown option: ' . $arg);
21 | }
22 | } else {
23 | $src = $argv[1];
24 | }
25 | }
26 |
27 | if (isset($src)) {
28 | if (!file_exists($src)) {
29 | fail('File not found: ' . $src);
30 | }
31 |
32 | $html = file_get_contents($src);
33 | } else {
34 | $stdin = fopen('php://stdin', 'r');
35 | stream_set_blocking($stdin, false);
36 | $html = stream_get_contents($stdin);
37 | fclose($stdin);
38 |
39 | if (empty($html)) {
40 | fail(getHelpText());
41 | }
42 | }
43 |
44 |
45 | $converter = new League\HTMLToMarkdown\HtmlConverter();
46 | echo $converter->convert($html);
47 |
48 | /**
49 | * Get help and usage info
50 | *
51 | * @return string
52 | */
53 | function getHelpText()
54 | {
55 | return << output.md
73 |
74 | Converting from STDIN:
75 |
76 | echo -e 'Hello World!
' | html-to-markdown
77 |
78 | Converting from STDIN and saving the output:
79 |
80 | echo -e 'Hello World!
' | html-to-markdown > output.md
81 |
82 | HELP;
83 | }
84 |
85 | /**
86 | * @param string $message Error message
87 | */
88 | function fail($message)
89 | {
90 | fwrite(STDERR, $message . "\n");
91 | exit(1);
92 | }
93 |
94 | function requireAutoloader()
95 | {
96 | $autoloadPaths = array(
97 | // Local package usage
98 | __DIR__ . '/../vendor/autoload.php',
99 | // Package was included as a library
100 | __DIR__ . '/../../../autoload.php',
101 | );
102 | foreach ($autoloadPaths as $path) {
103 | if (file_exists($path)) {
104 | require_once $path;
105 | break;
106 | }
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "league/html-to-markdown",
3 | "type": "library",
4 | "description": "An HTML-to-markdown conversion helper for PHP",
5 | "keywords": ["markdown", "html"],
6 | "homepage": "https://github.com/thephpleague/html-to-markdown",
7 | "license": "MIT",
8 | "authors": [
9 | {
10 | "name": "Colin O'Dell",
11 | "email": "colinodell@gmail.com",
12 | "homepage": "https://www.colinodell.com",
13 | "role": "Lead Developer"
14 | },
15 | {
16 | "name": "Nick Cernis",
17 | "email": "nick@cern.is",
18 | "homepage": "http://modernnerd.net",
19 | "role": "Original Author"
20 | }
21 | ],
22 | "autoload": {
23 | "psr-4": {
24 | "League\\HTMLToMarkdown\\": "src/"
25 | }
26 | },
27 | "autoload-dev": {
28 | "psr-4": {
29 | "League\\HTMLToMarkdown\\Test\\": "tests"
30 | }
31 | },
32 | "require": {
33 | "php": ">=5.3.3",
34 | "ext-dom": "*",
35 | "ext-xml": "*"
36 | },
37 | "require-dev": {
38 | "mikehaertl/php-shellcommand": "~1.1.0",
39 | "phpunit/phpunit": "^4.8|^5.7",
40 | "scrutinizer/ocular": "~1.1"
41 | },
42 | "bin": ["bin/html-to-markdown"],
43 | "extra": {
44 | "branch-alias": {
45 | "dev-master": "4.10-dev"
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Configuration.php:
--------------------------------------------------------------------------------
1 | config = $config;
15 |
16 | $this->checkForDeprecatedOptions($config);
17 | }
18 |
19 | /**
20 | * @param array $config
21 | */
22 | public function merge(array $config = array())
23 | {
24 | $this->checkForDeprecatedOptions($config);
25 | $this->config = array_replace_recursive($this->config, $config);
26 | }
27 |
28 | /**
29 | * @param array $config
30 | */
31 | public function replace(array $config = array())
32 | {
33 | $this->checkForDeprecatedOptions($config);
34 | $this->config = $config;
35 | }
36 |
37 | /**
38 | * @param string $key
39 | * @param mixed $value
40 | */
41 | public function setOption($key, $value)
42 | {
43 | $this->checkForDeprecatedOptions(array($key => $value));
44 | $this->config[$key] = $value;
45 | }
46 |
47 | /**
48 | * @param string|null $key
49 | * @param mixed|null $default
50 | *
51 | * @return mixed|null
52 | */
53 | public function getOption($key = null, $default = null)
54 | {
55 | if ($key === null) {
56 | return $this->config;
57 | }
58 |
59 | if (!isset($this->config[$key])) {
60 | return $default;
61 | }
62 |
63 | return $this->config[$key];
64 | }
65 |
66 | private function checkForDeprecatedOptions(array $config)
67 | {
68 | foreach ($config as $key => $value) {
69 | if ($key === 'bold_style' && $value !== '**') {
70 | @trigger_error('Customizing the bold_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED);
71 | } elseif ($key === 'italic_style' && $value !== '*') {
72 | @trigger_error('Customizing the italic_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED);
73 | }
74 | }
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/ConfigurationAwareInterface.php:
--------------------------------------------------------------------------------
1 | ' symbols to each line.
18 |
19 | $markdown = '';
20 |
21 | $quote_content = trim($element->getValue());
22 |
23 | $lines = preg_split('/\r\n|\r|\n/', $quote_content);
24 |
25 | $total_lines = count($lines);
26 |
27 | foreach ($lines as $i => $line) {
28 | $markdown .= '> ' . $line . "\n";
29 | if ($i + 1 === $total_lines) {
30 | $markdown .= "\n";
31 | }
32 | }
33 |
34 | return $markdown;
35 | }
36 |
37 | /**
38 | * @return string[]
39 | */
40 | public function getSupportedTags()
41 | {
42 | return array('blockquote');
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/CodeConverter.php:
--------------------------------------------------------------------------------
1 | getAttribute('class');
20 |
21 | if ($classes) {
22 | // Since tags can have more than one class, we need to find the one that starts with 'language-'
23 | $classes = explode(' ', $classes);
24 | foreach ($classes as $class) {
25 | if (strpos($class, 'language-') !== false) {
26 | // Found one, save it as the selected language and stop looping over the classes.
27 | $language = str_replace('language-', '', $class);
28 | break;
29 | }
30 | }
31 | }
32 |
33 | $markdown = '';
34 | $code = html_entity_decode($element->getChildrenAsString());
35 |
36 | // In order to remove the code tags we need to search for them and, in the case of the opening tag
37 | // use a regular expression to find the tag and the other attributes it might have
38 | $code = preg_replace('/]*>/', '', $code);
39 | $code = str_replace('
', '', $code);
40 |
41 | // Checking if it's a code block or span
42 | if ($this->shouldBeBlock($element, $code)) {
43 | // Code block detected, newlines will be added in parent
44 | $markdown .= '```' . $language . "\n" . $code . "\n" . '```';
45 | } else {
46 | // One line of code, wrapping it on one backtick, removing new lines
47 | $markdown .= '`' . preg_replace('/\r\n|\r|\n/', '', $code) . '`';
48 | }
49 |
50 | return $markdown;
51 | }
52 |
53 | /**
54 | * @return string[]
55 | */
56 | public function getSupportedTags()
57 | {
58 | return array('code');
59 | }
60 |
61 | /**
62 | * @param ElementInterface $element
63 | * @param string $code
64 | *
65 | * @return bool
66 | */
67 | private function shouldBeBlock(ElementInterface $element, $code)
68 | {
69 | if ($element->getParent()->getTagName() == 'pre') {
70 | return true;
71 | }
72 |
73 | if (preg_match('/[^\s]` `/', $code)) {
74 | return true;
75 | }
76 |
77 | return false;
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/CommentConverter.php:
--------------------------------------------------------------------------------
1 | config = $config;
22 | }
23 |
24 | /**
25 | * @param ElementInterface $element
26 | *
27 | * @return string
28 | */
29 | public function convert(ElementInterface $element)
30 | {
31 | if ($this->shouldPreserve($element)) {
32 | return '';
33 | }
34 | return '';
35 | }
36 |
37 | /**
38 | * @return string[]
39 | */
40 | public function getSupportedTags()
41 | {
42 | return array('#comment');
43 | }
44 |
45 | /**
46 | * @param ElementInterface $element
47 | *
48 | * @return bool
49 | */
50 | private function shouldPreserve(ElementInterface $element)
51 | {
52 | $preserve = $this->config->getOption('preserve_comments');
53 | if ($preserve === true) {
54 | return true;
55 | }
56 | if (is_array($preserve)) {
57 | $value = trim($element->getValue());
58 | return in_array($value, $preserve);
59 | }
60 | return false;
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/ConverterInterface.php:
--------------------------------------------------------------------------------
1 | config = $config;
24 | }
25 |
26 | /**
27 | * @param ElementInterface $element
28 | *
29 | * @return string
30 | */
31 | public function convert(ElementInterface $element)
32 | {
33 | // If strip_tags is false (the default), preserve tags that don't have Markdown equivalents,
34 | // such as nodes on their own. C14N() canonicalizes the node to a string.
35 | // See: http://www.php.net/manual/en/domnode.c14n.php
36 | if ($this->config->getOption('strip_tags', false)) {
37 | return $element->getValue();
38 | }
39 |
40 | $markdown = html_entity_decode($element->getChildrenAsString());
41 |
42 | if ($element->getTagName() === 'table') {
43 | $markdown .= "\n\n";
44 | }
45 |
46 | return $markdown;
47 | }
48 |
49 | /**
50 | * @return string[]
51 | */
52 | public function getSupportedTags()
53 | {
54 | return array(self::DEFAULT_CONVERTER);
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/DivConverter.php:
--------------------------------------------------------------------------------
1 | config = $config;
22 | }
23 |
24 | /**
25 | * @param ElementInterface $element
26 | *
27 | * @return string
28 | */
29 | public function convert(ElementInterface $element)
30 | {
31 | if ($this->config->getOption('strip_tags', false)) {
32 | return $element->getValue() . "\n\n";
33 | }
34 |
35 | return html_entity_decode($element->getChildrenAsString());
36 | }
37 |
38 | /**
39 | * @return string[]
40 | */
41 | public function getSupportedTags()
42 | {
43 | return array('div');
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php:
--------------------------------------------------------------------------------
1 | config = $config;
22 | }
23 |
24 | /**
25 | * @param ElementInterface $element
26 | *
27 | * @return string
28 | */
29 | public function convert(ElementInterface $element)
30 | {
31 | $tag = $element->getTagName();
32 | $value = $element->getValue();
33 |
34 | if (!trim($value)) {
35 | return $value;
36 | }
37 |
38 | if ($tag === 'i' || $tag === 'em') {
39 | $style = $this->config->getOption('italic_style');
40 | } else {
41 | $style = $this->config->getOption('bold_style');
42 | }
43 |
44 | $prefix = ltrim($value) !== $value ? ' ' : '';
45 | $suffix = rtrim($value) !== $value ? ' ' : '';
46 |
47 | return $prefix . $style . trim($value) . $style . $suffix;
48 | }
49 |
50 | /**
51 | * @return string[]
52 | */
53 | public function getSupportedTags()
54 | {
55 | return array('em', 'i', 'strong', 'b');
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php:
--------------------------------------------------------------------------------
1 | config = $config;
22 | }
23 |
24 | /**
25 | * @param ElementInterface $element
26 | *
27 | * @return string
28 | */
29 | public function convert(ElementInterface $element)
30 | {
31 | $return = $this->config->getOption('hard_break') ? "\n" : " \n";
32 |
33 | $next = $element->getNext();
34 | if ($next) {
35 | $next_value = $next->getValue();
36 | if ($next_value) {
37 | if (in_array(substr($next_value, 0, 2), array('- ', '* ', '+ '))) {
38 | $parent = $element->getParent();
39 | if ($parent && $parent->getTagName() == 'li') {
40 | $return .= '\\';
41 | }
42 | }
43 | }
44 | }
45 |
46 | return $return;
47 | }
48 |
49 | /**
50 | * @return string[]
51 | */
52 | public function getSupportedTags()
53 | {
54 | return array('br');
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php:
--------------------------------------------------------------------------------
1 | config = $config;
25 | }
26 |
27 | /**
28 | * @param ElementInterface $element
29 | *
30 | * @return string
31 | */
32 | public function convert(ElementInterface $element)
33 | {
34 | $level = (int) substr($element->getTagName(), 1, 1);
35 | $style = $this->config->getOption('header_style', self::STYLE_SETEXT);
36 |
37 | if (strlen($element->getValue()) === 0) {
38 | return "\n";
39 | }
40 |
41 | if (($level === 1 || $level === 2) && !$element->isDescendantOf('blockquote') && $style === self::STYLE_SETEXT) {
42 | return $this->createSetextHeader($level, $element->getValue());
43 | }
44 |
45 | return $this->createAtxHeader($level, $element->getValue());
46 | }
47 |
48 | /**
49 | * @return string[]
50 | */
51 | public function getSupportedTags()
52 | {
53 | return array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
54 | }
55 |
56 | /**
57 | * @param int $level
58 | * @param string $content
59 | *
60 | * @return string
61 | */
62 | private function createSetextHeader($level, $content)
63 | {
64 | $length = function_exists('mb_strlen') ? mb_strlen($content, 'utf-8') : strlen($content);
65 | $underline = ($level === 1) ? '=' : '-';
66 |
67 | return $content . "\n" . str_repeat($underline, $length) . "\n\n";
68 | }
69 |
70 | /**
71 | * @param int $level
72 | * @param string $content
73 | *
74 | * @return string
75 | */
76 | private function createAtxHeader($level, $content)
77 | {
78 | $prefix = str_repeat('#', $level) . ' ';
79 |
80 | return $prefix . $content . "\n\n";
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php:
--------------------------------------------------------------------------------
1 | getAttribute('src');
17 | $alt = $element->getAttribute('alt');
18 | $title = $element->getAttribute('title');
19 |
20 | if ($title !== '') {
21 | // No newlines added.
should be in a block-level element.
22 | return '';
23 | }
24 |
25 | return '';
26 | }
27 |
28 | /**
29 | * @return string[]
30 | */
31 | public function getSupportedTags()
32 | {
33 | return array('img');
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/LinkConverter.php:
--------------------------------------------------------------------------------
1 | config = $config;
21 | }
22 |
23 | /**
24 | * @param ElementInterface $element
25 | *
26 | * @return string
27 | */
28 | public function convert(ElementInterface $element)
29 | {
30 | $href = $element->getAttribute('href');
31 | $title = $element->getAttribute('title');
32 | $text = trim($element->getValue(), "\t\n\r\0\x0B");
33 |
34 | if ($title !== '') {
35 | $markdown = '[' . $text . '](' . $href . ' "' . $title . '")';
36 | } elseif ($href === $text && $this->isValidAutolink($href)) {
37 | $markdown = '<' . $href . '>';
38 | } elseif ($href === 'mailto:' . $text && $this->isValidEmail($text)) {
39 | $markdown = '<' . $text . '>';
40 | } else {
41 | if (stristr($href, ' ')) {
42 | $href = '<'.$href.'>';
43 | }
44 | $markdown = '[' . $text . '](' . $href . ')';
45 | }
46 |
47 | if (!$href) {
48 | $markdown = html_entity_decode($element->getChildrenAsString());
49 | }
50 |
51 | return $markdown;
52 | }
53 |
54 | /**
55 | * @return string[]
56 | */
57 | public function getSupportedTags()
58 | {
59 | return array('a');
60 | }
61 |
62 | /**
63 | * @param string $href
64 | *
65 | * @return bool
66 | */
67 | private function isValidAutolink($href)
68 | {
69 | $useAutolinks = $this->config->getOption('use_autolinks');
70 | return $useAutolinks && (preg_match('/^[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*/i', $href) === 1);
71 | }
72 |
73 | /**
74 | * @param string $email
75 | *
76 | * @return bool
77 | */
78 | private function isValidEmail($email)
79 | {
80 | // Email validation is messy business, but this should cover most cases
81 | return filter_var($email, FILTER_VALIDATE_EMAIL);
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php:
--------------------------------------------------------------------------------
1 | getValue() . "\n";
17 | }
18 |
19 | /**
20 | * @return string[]
21 | */
22 | public function getSupportedTags()
23 | {
24 | return array('ol', 'ul');
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/ListItemConverter.php:
--------------------------------------------------------------------------------
1 | config = $config;
27 | }
28 |
29 | /**
30 | * @param ElementInterface $element
31 | *
32 | * @return string
33 | */
34 | public function convert(ElementInterface $element)
35 | {
36 | // If parent is an ol, use numbers, otherwise, use dashes
37 | $list_type = $element->getParent()->getTagName();
38 |
39 | // Add spaces to start for nested list items
40 | $level = $element->getListItemLevel($element);
41 |
42 | $prefixForParagraph = str_repeat(' ', $level + 1);
43 | $value = trim(implode("\n" . $prefixForParagraph, explode("\n", trim($element->getValue()))));
44 |
45 | // If list item is the first in a nested list, add a newline before it
46 | $prefix = '';
47 | if ($level > 0 && $element->getSiblingPosition() === 1) {
48 | $prefix = "\n";
49 | }
50 |
51 | if ($list_type === 'ul') {
52 | $list_item_style = $this->config->getOption('list_item_style', '-');
53 | $list_item_style_alternate = $this->config->getOption('list_item_style_alternate');
54 | if (!isset($this->listItemStyle)) {
55 | $this->listItemStyle = $list_item_style_alternate ? $list_item_style_alternate : $list_item_style;
56 | }
57 |
58 | if ($list_item_style_alternate && $level == 0 && $element->getSiblingPosition() === 1) {
59 | $this->listItemStyle = $this->listItemStyle == $list_item_style ? $list_item_style_alternate : $list_item_style;
60 | }
61 |
62 | return $prefix . $this->listItemStyle . ' ' . $value . "\n";
63 | }
64 |
65 | if ($list_type === 'ol' && $start = $element->getParent()->getAttribute('start')) {
66 | $number = $start + $element->getSiblingPosition() - 1;
67 | } else {
68 | $number = $element->getSiblingPosition();
69 | }
70 |
71 | return $prefix . $number . '. ' . $value . "\n";
72 | }
73 |
74 | /**
75 | * @return string[]
76 | */
77 | public function getSupportedTags()
78 | {
79 | return array('li');
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php:
--------------------------------------------------------------------------------
1 | getValue();
17 |
18 | $markdown = '';
19 |
20 | $lines = preg_split('/\r\n|\r|\n/', $value);
21 | foreach ($lines as $line) {
22 | /*
23 | * Some special characters need to be escaped based on the position that they appear
24 | * The following function will deal with those special cases.
25 | */
26 | $markdown .= $this->escapeSpecialCharacters($line);
27 | $markdown .= "\n";
28 | }
29 |
30 | return trim($markdown) !== '' ? rtrim($markdown) . "\n\n" : '';
31 | }
32 |
33 | /**
34 | * @return string[]
35 | */
36 | public function getSupportedTags()
37 | {
38 | return array('p');
39 | }
40 |
41 | /**
42 | * @param string $line
43 | *
44 | * @return string
45 | */
46 | private function escapeSpecialCharacters($line)
47 | {
48 | $line = $this->escapeFirstCharacters($line);
49 | $line = $this->escapeOtherCharacters($line);
50 | $line = $this->escapeOtherCharactersRegex($line);
51 |
52 | return $line;
53 | }
54 |
55 | /**
56 | * @param string $line
57 | *
58 | * @return string
59 | */
60 | private function escapeFirstCharacters($line)
61 | {
62 | $escapable = array(
63 | '>',
64 | '- ',
65 | '+ ',
66 | '--',
67 | '~~~',
68 | '---',
69 | '- - -'
70 | );
71 |
72 | foreach ($escapable as $i) {
73 | if (strpos(ltrim($line), $i) === 0) {
74 | // Found a character that must be escaped, adding a backslash before
75 | return '\\' . ltrim($line);
76 | }
77 | }
78 |
79 | return $line;
80 | }
81 |
82 | /**
83 | * @param string $line
84 | *
85 | * @return string
86 | */
87 | private function escapeOtherCharacters($line)
88 | {
89 | $escapable = array(
90 | '