├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── ROADMAP.md ├── _typos.toml ├── composer.json ├── phpbench.json ├── phpcs.xml ├── phpstan.neon ├── phpunit.xml ├── sonar-project.properties ├── src ├── bin │ ├── class-file-operations.php │ ├── class-pattern-converter.php │ ├── patterns.json │ ├── update-iana.php │ └── update-patterns.php ├── class-dom.php ├── class-hyphenator.php ├── class-php-typography.php ├── class-re.php ├── class-settings.php ├── class-strings.php ├── class-text-parser.php ├── class-u.php ├── diacritics │ ├── de-DE.json │ └── en-US.json ├── exceptions │ ├── class-invalid-encoding-exception.php │ ├── class-invalid-file-exception.php │ ├── class-invalid-hyphenation-pattern-file-exception.php │ ├── class-invalid-json-exception.php │ ├── class-invalid-path-exception.php │ └── class-invalid-style-exception.php ├── fixes │ ├── class-default-registry.php │ ├── class-node-fix.php │ ├── class-registry.php │ ├── class-token-fix.php │ ├── node-fixes │ │ ├── class-abstract-node-fix.php │ │ ├── class-classes-dependent-fix.php │ │ ├── class-dash-spacing-fix.php │ │ ├── class-dewidow-fix.php │ │ ├── class-french-punctuation-spacing-fix.php │ │ ├── class-numbered-abbreviation-spacing-fix.php │ │ ├── class-process-words-fix.php │ │ ├── class-simple-regex-replacement-fix.php │ │ ├── class-simple-style-fix.php │ │ ├── class-single-character-word-spacing-fix.php │ │ ├── class-smart-area-units-fix.php │ │ ├── class-smart-dashes-fix.php │ │ ├── class-smart-diacritics-fix.php │ │ ├── class-smart-ellipses-fix.php │ │ ├── class-smart-exponents-fix.php │ │ ├── class-smart-fractions-fix.php │ │ ├── class-smart-marks-fix.php │ │ ├── class-smart-maths-fix.php │ │ ├── class-smart-ordinal-suffix-fix.php │ │ ├── class-smart-quotes-fix.php │ │ ├── class-space-collapse-fix.php │ │ ├── class-style-ampersands-fix.php │ │ ├── class-style-caps-fix.php │ │ ├── class-style-hanging-punctuation-fix.php │ │ ├── class-style-initial-quotes-fix.php │ │ ├── class-style-numbers-fix.php │ │ ├── class-unicode-remapping-fix.php │ │ └── class-unit-spacing-fix.php │ └── token-fixes │ │ ├── class-abstract-token-fix.php │ │ ├── class-hyphenate-compounds-fix.php │ │ ├── class-hyphenate-fix.php │ │ ├── class-smart-dashes-hyphen-fix.php │ │ ├── class-wrap-emails-fix.php │ │ ├── class-wrap-hard-hyphens-fix.php │ │ └── class-wrap-urls-fix.php ├── hyphenator │ ├── class-cache.php │ └── class-trie-node.php ├── lang │ ├── af.json │ ├── am.json │ ├── as.json │ ├── be.json │ ├── bg.json │ ├── bn.json │ ├── ca.json │ ├── cs.json │ ├── cu.json │ ├── cy.json │ ├── da.json │ ├── de-1901.json │ ├── de-CH-1901.json │ ├── de.json │ ├── el-Mono.json │ ├── el-Poly.json │ ├── en-GB.json │ ├── en-US.json │ ├── eo.json │ ├── es.json │ ├── et.json │ ├── eu.json │ ├── fi.json │ ├── fr.json │ ├── fur.json │ ├── ga.json │ ├── gl.json │ ├── grc.json │ ├── gu.json │ ├── hi.json │ ├── hr.json │ ├── hsb.json │ ├── hu.json │ ├── hy.json │ ├── ia.json │ ├── id.json │ ├── is.json │ ├── it.json │ ├── ka.json │ ├── kmr.json │ ├── kn.json │ ├── la-classic.json │ ├── la-liturgic.json │ ├── la.json │ ├── lt.json │ ├── lv.json │ ├── ml.json │ ├── mn-Cyrl.json │ ├── mr.json │ ├── nb.json │ ├── nl.json │ ├── nn.json │ ├── no.json │ ├── oc.json │ ├── or.json │ ├── pa.json │ ├── pl.json │ ├── pms.json │ ├── pt.json │ ├── rm.json │ ├── ro.json │ ├── ru.json │ ├── sa.json │ ├── sh-Cyrl.json │ ├── sh-Latn.json │ ├── sk.json │ ├── sl.json │ ├── sr-Cyrl.json │ ├── sv.json │ ├── ta.json │ ├── te.json │ ├── th.json │ ├── tk.json │ ├── tr.json │ ├── uk.json │ └── zh-Latn.json ├── settings │ ├── class-dash-style.php │ ├── class-dashes.php │ ├── class-quote-style.php │ ├── class-quotes.php │ ├── class-simple-dashes.php │ └── class-simple-quotes.php └── text-parser │ └── class-token.php └── tests ├── benchmarks ├── class-detect-encoding-bench.php ├── class-dom-bench.php ├── class-functions-bench.php ├── class-html-parser-bench.php ├── class-hyphenator-cache-bench.php ├── class-multibyte-switching-bench.php ├── class-native-string-split.php ├── class-php-typography-bench.php └── data │ └── example1.html ├── bootstrap.php ├── class-dom-test.php ├── class-hyphenator-test.php ├── class-php-typography-css-classes.php ├── class-php-typography-test.php ├── class-re-test.php ├── class-settings-test.php ├── class-strings-test.php ├── class-testcase.php ├── class-text-parser-test.php ├── fixes ├── class-default-registry-test.php ├── class-registry-test.php ├── node-fixes │ ├── class-abstract-node-fix-test.php │ ├── class-classes-dependent-fix-test.php │ ├── class-dash-spacing-fix-test.php │ ├── class-dewidow-fix-test.php │ ├── class-french-punctuation-spacing-fix-test.php │ ├── class-node-fix-testcase.php │ ├── class-numbered-abbreviation-spacing-fix-test.php │ ├── class-process-words-fix-test.php │ ├── class-simple-regex-replacement-fix-test.php │ ├── class-simple-style-fix-test.php │ ├── class-single-character-word-spacing-fix-test.php │ ├── class-smart-area-units-fix-test.php │ ├── class-smart-dashes-fix-test.php │ ├── class-smart-diacritics-fix-test.php │ ├── class-smart-ellipses-fix-test.php │ ├── class-smart-exponents-fix-test.php │ ├── class-smart-fractions-fix-test.php │ ├── class-smart-marks-fix-test.php │ ├── class-smart-maths-fix-test.php │ ├── class-smart-ordinal-suffix-fix-test.php │ ├── class-smart-quotes-fix-test.php │ ├── class-space-collapse-fix-test.php │ ├── class-style-ampersands-fix-test.php │ ├── class-style-caps-fix-test.php │ ├── class-style-hanging-punctuation-fix-test.php │ ├── class-style-initial-quotes-fix-test.php │ ├── class-style-numbers-fix-test.php │ ├── class-unicode-remapping-fix-test.php │ └── class-unit-spacing-fix-test.php └── token-fixes │ ├── class-abstract-token-fix-test.php │ ├── class-hyphenate-compounds-fix-test.php │ ├── class-hyphenate-fix-test.php │ ├── class-smart-dashes-hyphen-fix-test.php │ ├── class-token-fix-testcase.php │ ├── class-wrap-emails-fix-test.php │ ├── class-wrap-hard-hyphens-fix-test.php │ └── class-wrap-urls-fix-test.php ├── hyphenator ├── class-cache-test.php └── class-trie-node-test.php ├── settings ├── class-dash-style-test.php ├── class-dashes-test.php ├── class-quote-style-test.php ├── class-quotes-test.php ├── class-simple-dashes-test.php └── class-simple-quotes-test.php └── text-parser └── class-token-test.php /.gitignore: -------------------------------------------------------------------------------- 1 | # tmp files 2 | *~ 3 | 4 | # Composer 5 | vendor/* 6 | composer.lock 7 | 8 | # Node.js/Grunt 9 | node_modules/* 10 | 11 | # SASS 12 | .sass-cache/* 13 | 14 | # PHPUnit cache 15 | .phpunit.result.cache 16 | 17 | # ignore generated code coverage files 18 | tests/coverage/* 19 | 20 | # ignore dummy PHP file for language names 21 | src/_language_names.php 22 | 23 | # IANA TLD list 24 | src/IANA/* 25 | 26 | # macOS Finder 27 | .DS_Store 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PHP-Typography 2 | 3 | ![Build Status](https://github.com/mundschenk-at/php-typography/actions/workflows/ci.yml/badge.svg) 4 | [![Latest Stable Version](https://poser.pugx.org/mundschenk-at/php-typography/v/stable)](https://packagist.org/packages/mundschenk-at/php-typography) 5 | [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=mundschenk-at_php-typography&metric=alert_status)](https://sonarcloud.io/dashboard?id=mundschenk-at_php-typography) 6 | [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=mundschenk-at_php-typography&metric=coverage)](https://sonarcloud.io/dashboard?id=mundschenk-at_php-typography) 7 | [![License](https://poser.pugx.org/mundschenk-at/php-typography/license)](https://packagist.org/packages/mundschenk-at/php-typography) 8 | 9 | A PHP library for improving your web typography: 10 | 11 | * Hyphenation — over 50 languages supported 12 | * Space control, including: 13 | - widow protection 14 | - gluing values to units 15 | - forced internal wrapping of long URLs & email addresses 16 | * Intelligent character replacement, including smart handling of: 17 | - quote marks (‘single’, “double”) 18 | - dashes ( – ) 19 | - ellipses (…) 20 | - trademarks, copyright & service marks (™ ©) 21 | - math symbols (5×5×5=53) 22 | - fractions (116) 23 | - ordinal suffixes (1st, 2nd) 24 | * CSS hooks for styling: 25 | - ampersands, 26 | - uppercase words, 27 | - numbers, 28 | - initial quotes & guillemets. 29 | 30 | ## Requirements 31 | 32 | * PHP 7.4.0 or above 33 | * The `mbstring` extension 34 | 35 | ## Installation 36 | 37 | The best way to use this package is through Composer: 38 | 39 | ```BASH 40 | $ composer require mundschenk-at/php-typography 41 | $ vendor/bin/update-iana.php 42 | ``` 43 | 44 | ## Basic Usage 45 | 46 | 1. Create a `Settings` object and enable the fixes you want. 47 | 2. Create a `PHP_Typography` instance and use it to process HTML fragments (or 48 | whole documents) using your defined settings. 49 | 50 | ```PHP 51 | $settings = new \PHP_Typography\Settings(); 52 | $settings->set_hyphenation( true ); 53 | $settings->set_hyphenation_language( 'en-US' ); 54 | 55 | $typo = new \PHP_Typography\PHP_Typography(); 56 | 57 | $hyphenated_html = $typo->process( $html_snippet, $settings ); 58 | 59 | ``` 60 | 61 | ## Roadmap 62 | 63 | Please have a look at [ROADMAP](ROADMAP.md) file for upcoming releases. 64 | 65 | ## License 66 | 67 | PHP-Typography is licensed under the GNU General Public License 2 or later - see the [LICENSE](LICENSE) file for details. 68 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | # Roadmap 2 | 3 | PHP-Typography follows [Semantic Versioning 2.0.0](https://semver.org/spec/v2.0.0.html), i.e. each release is numbered `MAJOR.MINOR.PATCH`. 4 | * `MAJOR` version is incremented when there are incompatible API changes (not necessarily huge, just not backwards compatible). 5 | * `MINOR` version is incremented when a release adds backwards-compatible features. 6 | * `PATCH` level is incremented for backwards-compatible bug fixes. 7 | 8 | The current stable release at the time of this writing is 6.2.0. The API has been mostly stable for a while, however some parts of it need a bit of polishing (documentation, parameter order etc.). Except for the `Settings` class, which so far remains monolithic, everything has been broken up in to self-contained smaller classes. 9 | 10 | Partitioning the `Settings` class to be more modular is conceptually difficult, but has to happen sooner or later. When it's done, there will be a 7.0.0 release. All development towards that end happens in the `7.0-dev` branch. 11 | 12 | **Last updated:** 2018-08-26 13 | -------------------------------------------------------------------------------- /_typos.toml: -------------------------------------------------------------------------------- 1 | [files] 2 | extend-exclude = [ 3 | ".git/", 4 | "src/lang/*.json", 5 | "src/diacritics/*.json", 6 | "tests/", 7 | "CHANGELOG.md", 8 | "README.md" 9 | ] 10 | ignore-hidden = false 11 | 12 | [default] 13 | extend-ignore-re = [ 14 | # spellchecker:disable-line: 15 | "(?Rm)^.*#\\s*spellchecker:disable-line(\\b.*)?$", 16 | 17 | # spellchecker:: 18 | "#\\s*spellchecker:off\\s*\\n.*\\n\\s*#\\s*spellchecker:on" 19 | ] 20 | 21 | [type.php] 22 | extend-ignore-re = [ 23 | # spellchecker:disable-line: 24 | "(?Rm)^.*//\\s*@spellchecker:disable-line(\\b.*)?$", 25 | 26 | # spellchecker:: 27 | "//\\s*@spellchecker:off\\s*\\n.*\\n\\s*//\\s*@spellchecker:on" 28 | ] 29 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mundschenk-at/php-typography", 3 | "description": "A PHP library for improving your web typography", 4 | "type": "library", 5 | "license": "GPL-2.0-or-later", 6 | "authors": [ 7 | { 8 | "name": "Peter Putzer", 9 | "email": "github@mundschenk.at", 10 | "homepage": "https://code.mundschenk.at", 11 | "role": "Developer" 12 | }, 13 | { 14 | "name": "Jeffrey D. King", 15 | "email": "jeff.king@weathersource.com", 16 | "homepage": "http://kingdesk.com", 17 | "role": "Original author" 18 | } 19 | ], 20 | 21 | "require": { 22 | "php": ">=7.4.0", 23 | "ext-pcre": "*", 24 | "ext-mbstring": "*", 25 | "masterminds/html5": "^2.5.0" 26 | }, 27 | "require-dev": { 28 | "ext-curl": "*", 29 | "phpunit/phpunit": "9.*|10.*", 30 | "brain/monkey": "^2.2.0", 31 | "squizlabs/php_codesniffer": "^3", 32 | "wp-coding-standards/wpcs": "^3", 33 | "phpcompatibility/php-compatibility": "^9.0", 34 | "dealerdirect/phpcodesniffer-composer-installer": "^1.0", 35 | "phpstan/phpstan": "^1.9", 36 | "phpbench/phpbench": "^0.17||^1.0@dev", 37 | "mikey179/vfsstream": "~1", 38 | "mundschenk-at/phpunit-cross-version": "dev-master", 39 | "phpstan/phpstan-mockery": "^1.1", 40 | "phpstan/extension-installer": "^1.2" 41 | }, 42 | 43 | "autoload": { 44 | "classmap": ["src/"] 45 | }, 46 | "autoload-dev": { 47 | "classmap": ["tests/"] 48 | }, 49 | 50 | "bin": [ 51 | "src/bin/update-patterns.php", 52 | "src/bin/update-iana.php" 53 | ], 54 | 55 | "scripts": { 56 | "post-update-cmd": [ 57 | "@update-iana" 58 | ], 59 | "post-install-cmd": [ 60 | "@update-iana" 61 | ], 62 | "update-iana": [ 63 | "php src/bin/update-iana.php" 64 | ], 65 | "update-patterns": [ 66 | "php src/bin/update-patterns.php" 67 | ], 68 | "test": [ 69 | "phpunit --testsuite PHP-Typography" 70 | ], 71 | "coverage": [ 72 | "XDEBUG_MODE=coverage phpunit --testsuite PHP-Typography --coverage-html tests/coverage" 73 | ], 74 | "check": [ 75 | "@phpcs", 76 | "@phpstan" 77 | ], 78 | "phpcs": [ 79 | "phpcs -p -s src/ tests/ --ignore=tests/benchmarks,tests/coverage --extensions=php" 80 | ], 81 | "phpstan": [ 82 | "phpstan analyze --memory-limit 1G" 83 | ], 84 | "phpstan-clean": [ 85 | "phpstan clear-result-cache && phpstan analyze --memory-limit 1G" 86 | ] 87 | }, 88 | "config": { 89 | "allow-plugins": { 90 | "dealerdirect/phpcodesniffer-composer-installer": true, 91 | "phpstan/extension-installer": true 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /phpbench.json: -------------------------------------------------------------------------------- 1 | { 2 | "runner.bootstrap": "vendor/autoload.php", 3 | "runner.path": "tests/benchmarks" 4 | } 5 | -------------------------------------------------------------------------------- /phpcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | A custom set of code standard rules to check for the PHP-Tyography library (based on WordPress coding standards): 5 | 6 | * See https://github.com/squizlabs/PHP_CodeSniffer/wiki/Annotated-ruleset.xml 7 | * See https://github.com/WordPress-Coding-Standards/WordPress-Coding-Standards/blob/develop/WordPress-Core/ruleset.xml 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | */tests/*\.php 47 | 48 | 49 | */tests/*\.php 50 | 51 | 52 | 53 | */tests/*\.php 54 | 55 | 56 | 57 | 58 | */tests/*\.php 59 | 60 | 61 | -------------------------------------------------------------------------------- /phpstan.neon: -------------------------------------------------------------------------------- 1 | includes: 2 | # @see https://github.com/phpstan/phpstan-src/blob/master/conf/bleedingEdge.neon 3 | - phar://phpstan.phar/conf/bleedingEdge.neon 4 | 5 | parameters: 6 | level: 8 7 | inferPrivatePropertyTypeFromConstructor: true 8 | treatPhpDocTypesAsCertain: false 9 | reportUnmatchedIgnoredErrors: false 10 | 11 | paths: 12 | - src/ 13 | -------------------------------------------------------------------------------- /phpunit.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | src 6 | 7 | 8 | src/bin 9 | src/IANA 10 | src/lang 11 | src/diacritics 12 | 13 | 14 | 15 | 16 | tests 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /sonar-project.properties: -------------------------------------------------------------------------------- 1 | sonar.projectKey=mundschenk-at_php-typography 2 | sonar.organization=mundschenk-at 3 | 4 | # This is the name and version displayed in the SonarCloud UI. 5 | sonar.projectName=php-typography 6 | #sonar.projectVersion=1.0 7 | 8 | # Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows. 9 | sonar.sources=src 10 | sonar.tests=tests 11 | sonar.test.exclusions=tests/phpstan/*.php,tests/benchmarks/*.php 12 | sonar.php.coverage.reportPaths=build/logs/phpunit.coverage.xml 13 | sonar.php.tests.reportPath=build/logs/phpunit.test-report.xml 14 | 15 | # Encoding of the source code. Default is default system encoding 16 | sonar.sourceEncoding=UTF-8 17 | -------------------------------------------------------------------------------- /src/bin/class-file-operations.php: -------------------------------------------------------------------------------- 1 | 25 | * @license http://www.gnu.org/licenses/gpl-2.0.html 26 | */ 27 | 28 | namespace PHP_Typography\Bin; 29 | 30 | /** 31 | * Encapsulate some common file operations (including on remote files). 32 | * 33 | * @author Peter Putzer 34 | * 35 | * @since 5.0.0 36 | */ 37 | abstract class File_Operations { 38 | 39 | /** 40 | * Retrieve a HTTP response code via cURL. 41 | * 42 | * @param string $url Required. 43 | * 44 | * @return int 45 | */ 46 | public static function get_http_response_code( $url ) { 47 | 48 | $curl = curl_init(); 49 | curl_setopt_array( 50 | $curl, 51 | [ 52 | CURLOPT_RETURNTRANSFER => true, 53 | CURLOPT_URL => $url, 54 | ] 55 | ); 56 | curl_exec( $curl ); 57 | $response_code = curl_getinfo( $curl, CURLINFO_HTTP_CODE ); 58 | curl_close( $curl ); 59 | 60 | return $response_code; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/bin/update-iana.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | 26 | * @license http://www.gnu.org/licenses/gpl-2.0.html 27 | */ 28 | 29 | namespace PHP_Typography\Bin; 30 | 31 | /** 32 | * Autoload parser classes 33 | */ 34 | $autoload = dirname( dirname( __DIR__ ) ) . '/vendor/autoload.php'; 35 | if ( file_exists( $autoload ) ) { 36 | require_once $autoload; 37 | } else { 38 | // We are a dependency of another project. 39 | require_once dirname( dirname( dirname( dirname( __DIR__ ) ) ) ) . '/autoload.php'; 40 | } 41 | 42 | $source_file = 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt'; 43 | $target_file = dirname( __DIR__ ) . '/IANA/tlds-alpha-by-domain.txt'; 44 | 45 | if ( ! file_exists( $source_file ) && 404 === File_Operations::get_http_response_code( $source_file ) ) { 46 | echo "Error: unknown TLD file '{$source_file}'\n"; 47 | die( -3 ); 48 | } 49 | 50 | try { 51 | echo 'Trying to update IANA top-level domain list ...'; 52 | $domain_list = file_get_contents( $source_file ); 53 | 54 | if ( ! is_string( $domain_list ) ) { 55 | echo " error retrieving TLD file '{$source_file}'\n"; 56 | die( -3 ); 57 | } 58 | 59 | // Ensure directory exists. 60 | if ( ! is_dir( dirname( $target_file ) ) ) { 61 | mkdir( dirname( $target_file ), 0755, true ); 62 | } 63 | 64 | $file = new \SplFileObject( $target_file, 'w' ); 65 | if ( 0 === $file->fwrite( $domain_list ) ) { 66 | echo " error writing file\n"; 67 | } else { 68 | echo " done\n"; 69 | } 70 | } catch ( \Exception $e ) { 71 | echo " error\n"; 72 | } 73 | -------------------------------------------------------------------------------- /src/bin/update-patterns.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | 26 | * @license http://www.gnu.org/licenses/gpl-2.0.html 27 | */ 28 | 29 | namespace PHP_Typography\Bin; 30 | 31 | /** 32 | * Autoload parser classes 33 | */ 34 | $autoload = dirname( dirname( __DIR__ ) ) . '/vendor/autoload.php'; 35 | if ( file_exists( $autoload ) ) { 36 | require_once $autoload; 37 | } else { 38 | // We are a dependency of another project. 39 | require_once dirname( dirname( dirname( dirname( __DIR__ ) ) ) ) . '/autoload.php'; 40 | } 41 | 42 | $target_directory = dirname( __DIR__ ) . '/lang'; 43 | $pattern_files = file_get_contents( __DIR__ . '/patterns.json' ); 44 | 45 | if ( ! is_string( $pattern_files ) ) { 46 | echo "Error: Could not read '" . __DIR__ . "/patterns.json'\n"; 47 | die( -3 ); 48 | } 49 | 50 | $patterns_list = json_decode( $pattern_files, true ); 51 | 52 | foreach ( $patterns_list['list'] as $pattern ) { 53 | $language = $pattern['name']; 54 | $url = $pattern['url']; 55 | $filename = $pattern['short'] . '.json'; 56 | 57 | $converter = new Pattern_Converter( $url, $language ); 58 | 59 | echo "Parsing $language TeX file and converting it to lang/$filename ..."; 60 | 61 | try { 62 | $json_pattern = $converter->convert(); 63 | file_put_contents( $target_directory . '/' . $filename, $json_pattern ); 64 | echo " done\n"; 65 | } catch ( \Exception $e ) { 66 | echo " error, skipping\n"; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/class-strings.php: -------------------------------------------------------------------------------- 1 | [ 81 | 'strlen' => 'mb_strlen', 82 | 'str_split' => 'mb_str_split', 83 | 'strtolower' => 'mb_strtolower', 84 | 'strtoupper' => 'mb_strtoupper', 85 | 'substr' => 'mb_substr', 86 | 'u' => 'u', 87 | ], 88 | 'ASCII' => [ 89 | 'strlen' => 'strlen', 90 | 'str_split' => 'str_split', 91 | 'strtolower' => 'strtolower', 92 | 'strtoupper' => 'strtoupper', 93 | 'substr' => 'substr', 94 | 'u' => '', 95 | ], 96 | ]; 97 | 98 | /** 99 | * Retrieves str* functions. 100 | * 101 | * @param string $str A string to detect the encoding from. 102 | * 103 | * @return array 104 | * 105 | * @throws Invalid_Encoding_Exception Throws an exception if the string is not encoded in ASCII or UTF-8. 106 | * 107 | * @phpstan-return String_Functions 108 | */ 109 | public static function functions( $str ) { 110 | foreach ( self::ENCODINGS as $encoding ) { 111 | if ( \mb_check_encoding( $str, $encoding ) ) { 112 | return self::STRING_FUNCTIONS[ $encoding ]; 113 | } 114 | } 115 | 116 | throw new Invalid_Encoding_Exception( "String '$str' uses neither ASCII nor UTF-8 encoding." ); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/class-u.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 5.0.0 35 | */ 36 | interface U { 37 | 38 | const NO_BREAK_SPACE = "\xc2\xa0"; 39 | const NO_BREAK_NARROW_SPACE = "\xe2\x80\xaf"; 40 | const COPYRIGHT = "\xc2\xa9"; 41 | const GUILLEMET_OPEN = "\xc2\xab"; 42 | const SOFT_HYPHEN = "\xc2\xad"; 43 | const REGISTERED_MARK = "\xc2\xae"; 44 | const GUILLEMET_CLOSE = "\xc2\xbb"; 45 | const MULTIPLICATION = "\xc3\x97"; 46 | const DIVISION = "\xc3\xb7"; 47 | const FIGURE_SPACE = "\xe2\x80\x87"; 48 | const THIN_SPACE = "\xe2\x80\x89"; 49 | const HAIR_SPACE = "\xe2\x80\x8a"; 50 | const ZERO_WIDTH_SPACE = "\xe2\x80\x8b"; 51 | const HYPHEN_MINUS = '-'; 52 | const HYPHEN = "\xe2\x80\x90"; 53 | const NO_BREAK_HYPHEN = "\xe2\x80\x91"; 54 | const EN_DASH = "\xe2\x80\x93"; 55 | const EM_DASH = "\xe2\x80\x94"; 56 | const SINGLE_QUOTE_OPEN = "\xe2\x80\x98"; 57 | const SINGLE_QUOTE_CLOSE = "\xe2\x80\x99"; 58 | const APOSTROPHE = "\xca\xbc"; // This is the "MODIFIER LETTER APOSTROPHE". 59 | const SINGLE_LOW_9_QUOTE = "\xe2\x80\x9a"; 60 | const DOUBLE_QUOTE_OPEN = "\xe2\x80\x9c"; 61 | const DOUBLE_QUOTE_CLOSE = "\xe2\x80\x9d"; 62 | const DOUBLE_LOW_9_QUOTE = "\xe2\x80\x9e"; 63 | const ELLIPSIS = "\xe2\x80\xa6"; 64 | const SINGLE_PRIME = "\xe2\x80\xb2"; 65 | const DOUBLE_PRIME = "\xe2\x80\xb3"; 66 | const SINGLE_ANGLE_QUOTE_OPEN = "\xe2\x80\xb9"; 67 | const SINGLE_ANGLE_QUOTE_CLOSE = "\xe2\x80\xba"; 68 | const FRACTION_SLASH = "\xe2\x81\x84"; 69 | const SOUND_COPY_MARK = "\xe2\x84\x97"; 70 | const SERVICE_MARK = "\xe2\x84\xa0"; 71 | const TRADE_MARK = "\xe2\x84\xa2"; 72 | const MINUS = "\xe2\x88\x92"; 73 | const LEFT_CORNER_BRACKET = "\xe3\x80\x8c"; 74 | const RIGHT_CORNER_BRACKET = "\xe3\x80\x8d"; 75 | const LEFT_WHITE_CORNER_BRACKET = "\xe3\x80\x8e"; 76 | const RIGHT_WHITE_CORNER_BRACKET = "\xe3\x80\x8f"; 77 | const ZERO_WIDTH_JOINER = "\u{200c}"; 78 | const ZERO_WIDTH_NON_JOINER = "\u{200d}"; 79 | } 80 | -------------------------------------------------------------------------------- /src/exceptions/class-invalid-encoding-exception.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 7.0.0 35 | */ 36 | class Invalid_Encoding_Exception extends \UnexpectedValueException { 37 | } 38 | -------------------------------------------------------------------------------- /src/exceptions/class-invalid-file-exception.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 7.0.0 35 | */ 36 | class Invalid_File_Exception extends \UnexpectedValueException { 37 | } 38 | -------------------------------------------------------------------------------- /src/exceptions/class-invalid-hyphenation-pattern-file-exception.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 7.0.0 35 | */ 36 | class Invalid_Hyphenation_Pattern_File_Exception extends \RuntimeException { 37 | } 38 | -------------------------------------------------------------------------------- /src/exceptions/class-invalid-json-exception.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 7.0.0 35 | */ 36 | class Invalid_JSON_Exception extends \UnexpectedValueException { 37 | } 38 | -------------------------------------------------------------------------------- /src/exceptions/class-invalid-path-exception.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 7.0.0 35 | */ 36 | class Invalid_Path_Exception extends \RuntimeException { 37 | } 38 | -------------------------------------------------------------------------------- /src/exceptions/class-invalid-style-exception.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 7.0.0 35 | */ 36 | class Invalid_Style_Exception extends \DomainException { 37 | } 38 | -------------------------------------------------------------------------------- /src/fixes/class-node-fix.php: -------------------------------------------------------------------------------- 1 | 35 | * 36 | * @since 5.0.0 37 | */ 38 | interface Node_Fix { 39 | 40 | /** 41 | * Apply the fix to a given textnode. 42 | * 43 | * @since 7.0.0 All parameters are now required. 44 | * 45 | * @param \DOMText $textnode The DOM node. 46 | * @param Settings $settings The settings to apply. 47 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 48 | * 49 | * @return void 50 | */ 51 | public function apply( \DOMText $textnode, Settings $settings, $is_title ); 52 | 53 | /** 54 | * Determines whether the fix should be applied to (RSS) feeds. 55 | * 56 | * @return bool 57 | */ 58 | public function feed_compatible(); 59 | } 60 | -------------------------------------------------------------------------------- /src/fixes/class-token-fix.php: -------------------------------------------------------------------------------- 1 | 36 | * 37 | * @since 5.0.0 38 | */ 39 | interface Token_Fix { 40 | 41 | const MIXED_WORDS = 1; 42 | const COMPOUND_WORDS = 2; 43 | const WORDS = 3; 44 | const OTHER = 4; 45 | 46 | /** 47 | * Apply the fix to a given set of tokens 48 | * 49 | * @since 7.0.0 The parameter order has been re-arranged to mirror Node_Fix. 50 | * 51 | * @param Token[] $tokens The set of tokens. 52 | * @param \DOMText $textnode The context DOM node. 53 | * @param Settings $settings The settings to apply. 54 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 55 | * 56 | * @return Token[] The fixed set of tokens. 57 | */ 58 | public function apply( array $tokens, \DOMText $textnode, Settings $settings, $is_title ); 59 | 60 | /** 61 | * Determines whether the fix should be applied to (RSS) feeds. 62 | * 63 | * @return bool 64 | */ 65 | public function feed_compatible(); 66 | 67 | /** 68 | * Retrieves the target token array for this fix. 69 | * 70 | * @return int 71 | */ 72 | public function target(); 73 | } 74 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-abstract-node-fix.php: -------------------------------------------------------------------------------- 1 | 36 | * 37 | * @since 5.0.0 38 | */ 39 | abstract class Abstract_Node_Fix implements Node_Fix { 40 | 41 | /** 42 | * Is this fix compatible with feeds? 43 | * 44 | * @var bool 45 | */ 46 | private $feed_compatible; 47 | 48 | /** 49 | * Creates a new fix instance. 50 | * 51 | * @param bool $feed_compatible Optional. Default false. 52 | */ 53 | public function __construct( $feed_compatible = false ) { 54 | $this->feed_compatible = $feed_compatible; 55 | } 56 | 57 | /** 58 | * Apply the fix to a given textnode. 59 | * 60 | * @since 7.0.0 All parameters are now required. 61 | * 62 | * @param \DOMText $textnode The DOM node. 63 | * @param Settings $settings The settings to apply. 64 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 65 | * 66 | * @return void 67 | */ 68 | abstract public function apply( \DOMText $textnode, Settings $settings, $is_title ); 69 | 70 | /** 71 | * Determines whether the fix should be applied to (RSS) feeds. 72 | * 73 | * @return bool 74 | */ 75 | public function feed_compatible() { 76 | return $this->feed_compatible; 77 | } 78 | 79 | /** 80 | * Remove adjacent characters from given string. 81 | * 82 | * @since 4.2.2 83 | * @since 5.1.3 $prev_char and $next_char replaced with $prev_length and $next_length 84 | * to support multi-characters replacements. 85 | * @since 6.0.0 New required parameters for strlen() and substr() added. 86 | * 87 | * @param string $string The string. 88 | * @param callable $strlen A strlen()-type function. 89 | * @param callable $substr A substr()-type function. 90 | * @param int $prev_length Optional. Default 0. The number of characters to remove at the beginning. 91 | * @param int $next_length Optional. Default 0. The number of characters to remove at the end. 92 | * 93 | * @return string The string without the characters from adjacent nodes. 94 | */ 95 | protected static function remove_adjacent_characters( $string, callable $strlen, callable $substr, $prev_length = 0, $next_length = 0 ) { 96 | // Remove previous character. 97 | if ( $prev_length > 0 ) { 98 | $string = $substr( $string, $prev_length, $strlen( $string ) ); 99 | } 100 | 101 | // Remove next character. 102 | if ( $next_length > 0 ) { 103 | $string = $substr( $string, 0, $strlen( $string ) - $next_length ); 104 | } 105 | 106 | return $string; 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-classes-dependent-fix.php: -------------------------------------------------------------------------------- 1 | 36 | * 37 | * @since 5.0.0 38 | */ 39 | abstract class Classes_Dependent_Fix extends Abstract_Node_Fix { 40 | 41 | /** 42 | * An array of HTML classes to avoid applying the fix. 43 | * 44 | * @var string[] 45 | */ 46 | private $classes_to_avoid; 47 | 48 | /** 49 | * Creates a new classes dependent fix. 50 | * 51 | * @param string[]|string $classes HTML class(es). 52 | * @param bool $feed_compatible Optional. Default false. 53 | */ 54 | public function __construct( $classes, $feed_compatible = false ) { 55 | parent::__construct( $feed_compatible ); 56 | 57 | if ( ! is_array( $classes ) ) { 58 | $classes = [ $classes ]; 59 | } 60 | 61 | $this->classes_to_avoid = $classes; 62 | } 63 | 64 | /** 65 | * Apply the fix to a given textnode if the nodes class(es) allow it. 66 | * 67 | * @since 7.0.0 All parameters are now required. 68 | * 69 | * @param \DOMText $textnode The DOM node. 70 | * @param Settings $settings The settings to apply. 71 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 72 | * 73 | * @return void 74 | */ 75 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 76 | if ( ! DOM::has_class( $textnode, $this->classes_to_avoid ) ) { 77 | $this->apply_internal( $textnode, $settings, $is_title ); 78 | } 79 | } 80 | 81 | /** 82 | * Apply the fix to a given textnode. 83 | * 84 | * @since 6.0.0 The method was accidentally made public and is now protected. 85 | * @since 7.0.0 All parameters are now required. 86 | * 87 | * @param \DOMText $textnode The DOM node. 88 | * @param Settings $settings The settings to apply. 89 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 90 | * 91 | * @return void 92 | */ 93 | abstract protected function apply_internal( \DOMText $textnode, Settings $settings, $is_title ); 94 | } 95 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-french-punctuation-spacing-fix.php: -------------------------------------------------------------------------------- 1 | 44 | * 45 | * @since 5.0.0 46 | */ 47 | class French_Punctuation_Spacing_Fix extends Abstract_Node_Fix { 48 | // Regular expressions with mandatary Unicode modifier. 49 | const INSERT_NARROW_SPACE = '/(\w+(?:\s?»)?)(\s?)([?!;])(\s|\Z)/u'; 50 | const INSERT_FULL_SPACE = '/(\w+(?:\s?»)?)(\s?)(:)(\s|\Z)/u'; 51 | const INSERT_SPACE_AFTER_OPENING_QUOTE = '/(\s|\A|[\(\[])(«)(\s?)(\w+)/u'; 52 | const INSERT_SPACE_BEFORE_CLOSING_QUOTE = '/(\w+[.?!]?)(\s?)(»)(\s|[.,?!:\)\]]|\Z)/u'; 53 | 54 | /** 55 | * Apply the fix to a given textnode. 56 | * 57 | * @since 7.0.0 All parameters are now required. 58 | * 59 | * @param \DOMText $textnode The DOM node. 60 | * @param Settings $settings The settings to apply. 61 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 62 | * 63 | * @return void 64 | */ 65 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 66 | if ( empty( $settings->french_punctuation_spacing ) ) { 67 | return; 68 | } 69 | 70 | // Need to get context of adjacent characters outside adjacent inline tags or HTML comment 71 | // if we have adjacent characters add them to the text. 72 | $previous_character = DOM::get_previous_character( $textnode ); 73 | $next_character = DOM::get_next_character( $textnode ); 74 | $node_data = "{$previous_character}{$textnode->data}"; // $next_character is not included on purpose. 75 | 76 | // Check encoding. 77 | $f = Strings::functions( "{$node_data}{$next_character}" ); // Include $next_character for determining encodiing. 78 | 79 | $node_data = (string) \preg_replace( 80 | [ 81 | self::INSERT_SPACE_BEFORE_CLOSING_QUOTE, 82 | self::INSERT_NARROW_SPACE, 83 | self::INSERT_FULL_SPACE, 84 | ], 85 | [ 86 | '$1' . U::NO_BREAK_NARROW_SPACE . '$3$4', 87 | '$1' . U::NO_BREAK_NARROW_SPACE . '$3$4', 88 | '$1' . U::NO_BREAK_SPACE . '$3$4', 89 | ], 90 | $node_data 91 | ); 92 | 93 | // The next rule depends on the following characters as well. 94 | $node_data = (string) \preg_replace( self::INSERT_SPACE_AFTER_OPENING_QUOTE, '$1$2' . U::NO_BREAK_NARROW_SPACE . '$4', "{$node_data}{$next_character}" ); 95 | 96 | // If we have adjacent characters remove them from the text. 97 | $textnode->data = self::remove_adjacent_characters( $node_data, $f['strlen'], $f['substr'], $f['strlen']( $previous_character ), $f['strlen']( $next_character ) ); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-numbered-abbreviation-spacing-fix.php: -------------------------------------------------------------------------------- 1 | 39 | * 40 | * @since 5.0.0 41 | */ 42 | class Numbered_Abbreviation_Spacing_Fix extends Simple_Regex_Replacement_Fix { 43 | private const ISO = 'ISO(?:\/(?:IEC|TR|TS))?'; 44 | private const ABBREVIATIONS = ' 45 | ### International standards 46 | ' . self::ISO . '| 47 | 48 | ### German standards 49 | DIN| 50 | DIN[ ]EN(?:[ ]' . self::ISO . ')?| 51 | DIN[ ]EN[ ]ISP 52 | DIN[ ]' . self::ISO . '| 53 | DIN[ ]IEC| 54 | DIN[ ]CEN\/TS| 55 | DIN[ ]CLC\/TS| 56 | DIN[ ]CWA| 57 | DIN[ ]VDE| 58 | 59 | LN|VG|VDE|VDI 60 | 61 | ### Austrian standards 62 | ÖNORM| 63 | ÖNORM[ ](?:A|B|C|E|F|G|H|K|L|M|N|O|S|V|Z)| 64 | ÖNORM[ ]EN(?:[ ]' . self::ISO . ')?| 65 | ÖNORM[ ]ETS| 66 | 67 | ÖVE|ONR| 68 | 69 | ### Food additives 70 | E 71 | '; // required modifiers: x (multiline pattern). 72 | 73 | const REPLACEMENT = '$1' . U::NO_BREAK_SPACE . '$2'; 74 | const REGEX = '/\b(' . self::ABBREVIATIONS . ')[' . RE::NORMAL_SPACES . ']+([0-9]+)/xu'; 75 | 76 | /** 77 | * Creates a new fix object. 78 | * 79 | * @param bool $feed_compatible Optional. Default false. 80 | */ 81 | public function __construct( $feed_compatible = false ) { 82 | parent::__construct( self::REGEX, self::REPLACEMENT, Settings::NUMBERED_ABBREVIATION_SPACING, $feed_compatible ); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-simple-regex-replacement-fix.php: -------------------------------------------------------------------------------- 1 | 35 | * 36 | * @since 5.0.0 37 | */ 38 | abstract class Simple_Regex_Replacement_Fix extends Abstract_Node_Fix { 39 | 40 | /** 41 | * The setting string used to enable/disable the fix (e.g. 'styleAmpersands'). 42 | * 43 | * @var string 44 | */ 45 | protected $settings_switch; 46 | 47 | /** 48 | * The regular expressions used to match the text that should be wrapped in spans. 49 | * 50 | * It must contain a single matching expression. 51 | * 52 | * @var string 53 | */ 54 | protected $regex; 55 | 56 | /** 57 | * The replacement expression. 58 | * 59 | * @var string 60 | */ 61 | protected $replacement; 62 | 63 | /** 64 | * Creates a new node fix with a class. 65 | * 66 | * @param string $regex Regular expression to match the text. 67 | * @param string $replacement A replacement expression. 68 | * @param string $settings_switch On/off switch for fix. 69 | * @param bool $feed_compatible Optional. Default false. 70 | */ 71 | public function __construct( $regex, $replacement, $settings_switch, $feed_compatible = false ) { 72 | parent::__construct( $feed_compatible ); 73 | 74 | $this->regex = $regex . 'S'; // Add "Study" modifier. 75 | $this->settings_switch = $settings_switch; 76 | $this->replacement = $replacement; 77 | } 78 | 79 | /** 80 | * Apply the fix to a given textnode. 81 | * 82 | * @since 7.0.0 All parameters are now required. 83 | * 84 | * @param \DOMText $textnode The DOM node. 85 | * @param Settings $settings The settings to apply. 86 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 87 | * 88 | * @return void 89 | */ 90 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 91 | if ( empty( $settings->{ $this->settings_switch } ) ) { 92 | return; 93 | } 94 | 95 | $textnode->data = (string) \preg_replace( $this->regex, $this->replacement, $textnode->data ); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-simple-style-fix.php: -------------------------------------------------------------------------------- 1 | 36 | * 37 | * @since 5.0.0 38 | */ 39 | abstract class Simple_Style_Fix extends Classes_Dependent_Fix { 40 | 41 | /** 42 | * The setting string used to enable/disable the fix (e.g. 'styleAmpersands'). 43 | 44 | * @var string 45 | */ 46 | protected $settings_switch; 47 | 48 | /** 49 | * The regular expressions used to match the text that should be wrapped in spans. 50 | * 51 | * It must contain a single matching expression. 52 | * 53 | * @var string 54 | */ 55 | protected $regex; 56 | 57 | /** 58 | * The css class name to include in the generated markup. 59 | * 60 | * @var string 61 | */ 62 | protected $css_class; 63 | 64 | /** 65 | * Creates a new node fix with a class. 66 | * 67 | * @param string $regex Regular expression to match the text. 68 | * @param string $settings_switch On/off switch for fix. 69 | * @param string $css_class HTML class used in markup. 70 | * @param bool $feed_compatible Optional. Default false. 71 | */ 72 | public function __construct( $regex, $settings_switch, $css_class, $feed_compatible = false ) { 73 | parent::__construct( $css_class, $feed_compatible ); 74 | 75 | $this->regex = $regex; 76 | $this->settings_switch = $settings_switch; 77 | $this->css_class = $css_class; 78 | } 79 | 80 | /** 81 | * Apply the fix to a given textnode. 82 | * 83 | * @since 6.0.0 The method was accidentally made public and is now protected. 84 | * @since 7.0.0 All parameters are now required. 85 | * 86 | * @param \DOMText $textnode The DOM node. 87 | * @param Settings $settings The settings to apply. 88 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 89 | * 90 | * @return void 91 | */ 92 | protected function apply_internal( \DOMText $textnode, Settings $settings, $is_title ) { 93 | if ( empty( $settings->{ $this->settings_switch } ) ) { 94 | return; 95 | } 96 | 97 | $textnode->data = (string) \preg_replace( $this->regex, RE::escape_tags( "css_class}\">\$1" ), $textnode->data ); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-single-character-word-spacing-fix.php: -------------------------------------------------------------------------------- 1 | 39 | * 40 | * @since 5.0.0 41 | * @since 6.0.0 The replacement now assumes decoded ampersands (i.e. plain "&" instead of "&"). 42 | */ 43 | class Single_Character_Word_Spacing_Fix extends Abstract_Node_Fix { 44 | 45 | const REGEX = '/ 46 | (?: 47 | (\s) 48 | (\w|&) 49 | [' . RE::NORMAL_SPACES . '] 50 | (?=\w) 51 | ) 52 | /x'; 53 | 54 | /** 55 | * Apply the fix to a given textnode. 56 | * 57 | * @since 7.0.0 All parameters are now required. 58 | * 59 | * @param \DOMText $textnode The DOM node. 60 | * @param Settings $settings The settings to apply. 61 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 62 | * 63 | * @return void 64 | */ 65 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 66 | if ( empty( $settings->single_character_word_spacing ) ) { 67 | return; 68 | } 69 | 70 | // Add $next_character and $previous_character for context. 71 | $previous_character = DOM::get_previous_character( $textnode ); 72 | $next_character = DOM::get_next_character( $textnode ); 73 | $node_data = "{$previous_character}{$textnode->data}{$next_character}"; 74 | 75 | // Check encoding. 76 | $f = Strings::functions( $node_data ); 77 | 78 | // Replace spaces. 79 | $node_data = (string) \preg_replace( self::REGEX . $f['u'], '$1$2' . U::NO_BREAK_SPACE, $node_data ); 80 | 81 | // If we have adjacent characters remove them from the text. 82 | $textnode->data = self::remove_adjacent_characters( $node_data, $f['strlen'], $f['substr'], $f['strlen']( $previous_character ), $f['strlen']( $next_character ) ); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-smart-area-units-fix.php: -------------------------------------------------------------------------------- 1 | 35 | * 36 | * @since 5.0.0 37 | */ 38 | class Smart_Area_Units_Fix extends Abstract_Node_Fix { 39 | 40 | const LENGTH_UNITS = '(?:p|µ|[mcdhkMGT])?m'; // Just metric for now. 41 | const NUMBER = '[0-9]+(?:\.,)?[0-9]*'; 42 | const WHITESPACE = '\s*'; 43 | 44 | const AREA_UNITS = '/\b(' . self::NUMBER . ')(' . self::WHITESPACE . ')(' . self::LENGTH_UNITS . ')2\b/Su'; 45 | const VOLUME_UNITS = '/\b(' . self::NUMBER . ')(' . self::WHITESPACE . ')(' . self::LENGTH_UNITS . ')3\b/Su'; 46 | 47 | /** 48 | * Apply the fix to a given textnode. 49 | * 50 | * @since 7.0.0 All parameters are now required. 51 | * 52 | * @param \DOMText $textnode The DOM node. 53 | * @param Settings $settings The settings to apply. 54 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 55 | * 56 | * @return void 57 | */ 58 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 59 | if ( empty( $settings->smart_area_units ) ) { 60 | return; 61 | } 62 | 63 | $textnode->data = (string) \preg_replace( 64 | [ self::AREA_UNITS, self::VOLUME_UNITS ], 65 | [ '$1 $3²', '$1 $3³' ], 66 | $textnode->data 67 | ); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-smart-diacritics-fix.php: -------------------------------------------------------------------------------- 1 | 35 | * 36 | * @since 5.0.0 37 | */ 38 | class Smart_Diacritics_Fix extends Abstract_Node_Fix { 39 | 40 | /** 41 | * Apply the fix to a given textnode. 42 | * 43 | * @since 7.0.0 All parameters are now required. 44 | * 45 | * @param \DOMText $textnode The DOM node. 46 | * @param Settings $settings The settings to apply. 47 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 48 | * 49 | * @return void 50 | */ 51 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 52 | if ( empty( $settings->smart_diacritics ) || empty( $settings->diacritic_combined ) ) { 53 | return; // abort. 54 | } 55 | 56 | // FIXME: Add proper initialization and move condition upwards. 57 | if ( 58 | ! empty( $settings->diacritic_combined['patterns'] ) && 59 | ! empty( $settings->diacritic_combined['replacements'] ) 60 | ) { 61 | 62 | // Uses "word" => "replacement" pairs from an array to make fast preg_* replacements. 63 | $replacements = $settings->diacritic_combined['replacements']; 64 | $textnode->data = (string) \preg_replace_callback( 65 | $settings->diacritic_combined['patterns'], 66 | function ( $matching ) use ( $replacements ) { 67 | if ( isset( $replacements[ $matching[0] ] ) ) { 68 | return $replacements[ $matching[0] ]; 69 | } else { 70 | return $matching[0]; 71 | } 72 | }, 73 | $textnode->data 74 | ); 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-smart-ellipses-fix.php: -------------------------------------------------------------------------------- 1 | 36 | * 37 | * @since 5.0.0 38 | */ 39 | class Smart_Ellipses_Fix extends Abstract_Node_Fix { 40 | 41 | /** 42 | * Apply the fix to a given textnode. 43 | * 44 | * @since 7.0.0 All parameters are now required. 45 | * 46 | * @param \DOMText $textnode The DOM node. 47 | * @param Settings $settings The settings to apply. 48 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 49 | * 50 | * @return void 51 | */ 52 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 53 | if ( empty( $settings->smart_ellipses ) ) { 54 | return; 55 | } 56 | 57 | // Cache textnode content. 58 | $node_data = $textnode->data; 59 | 60 | $node_data = \str_replace( [ '....', '. . . .' ], '.' . U::ELLIPSIS, $node_data ); 61 | $node_data = \str_replace( [ '...', '. . .' ], U::ELLIPSIS, $node_data ); 62 | 63 | // Restore textnode content. 64 | $textnode->data = $node_data; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-smart-exponents-fix.php: -------------------------------------------------------------------------------- 1 | 38 | * 39 | * @since 5.0.0 40 | */ 41 | class Smart_Exponents_Fix extends Simple_Regex_Replacement_Fix { 42 | 43 | /** 44 | * Creates a new fix object. 45 | * 46 | * @param bool $feed_compatible Optional. Default false. 47 | */ 48 | public function __construct( $feed_compatible = false ) { 49 | parent::__construct( '/\b(\d+)\^(\w+)\b/u', RE::escape_tags( '$1$2' ), Settings::SMART_EXPONENTS, $feed_compatible ); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-smart-marks-fix.php: -------------------------------------------------------------------------------- 1 | 37 | * 38 | * @since 5.0.0 39 | */ 40 | class Smart_Marks_Fix extends Abstract_Node_Fix { 41 | 42 | const ESCAPE_501C = '/\b(501\()(c)(\)\((?:[1-9]|[1-2][0-9])\))/S'; 43 | 44 | const MARKS = [ 45 | '(c)' => U::COPYRIGHT, 46 | '(C)' => U::COPYRIGHT, 47 | '(r)' => U::REGISTERED_MARK, 48 | '(R)' => U::REGISTERED_MARK, 49 | '(p)' => U::SOUND_COPY_MARK, 50 | '(P)' => U::SOUND_COPY_MARK, 51 | '(sm)' => U::SERVICE_MARK, 52 | '(SM)' => U::SERVICE_MARK, 53 | '(tm)' => U::TRADE_MARK, 54 | '(TM)' => U::TRADE_MARK, 55 | ]; 56 | 57 | /** 58 | * An array of marks to match. 59 | * 60 | * @since 6.0.0 61 | * 62 | * @var string[] 63 | */ 64 | private $marks; 65 | 66 | /** 67 | * An array of replacement marks. 68 | * 69 | * @since 6.0.0 70 | * 71 | * @var string[] 72 | */ 73 | private $replacements; 74 | 75 | /** 76 | * Creates a new fix instance. 77 | * 78 | * @param bool $feed_compatible Optional. Default false. 79 | */ 80 | public function __construct( $feed_compatible = false ) { 81 | parent::__construct( $feed_compatible ); 82 | 83 | $this->marks = \array_keys( self::MARKS ); 84 | $this->replacements = \array_values( self::MARKS ); 85 | } 86 | 87 | /** 88 | * Apply the fix to a given textnode. 89 | * 90 | * @since 7.0.0 All parameters are now required. 91 | * 92 | * @param \DOMText $textnode The DOM node. 93 | * @param Settings $settings The settings to apply. 94 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 95 | * 96 | * @return void 97 | */ 98 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 99 | if ( empty( $settings->smart_marks ) ) { 100 | return; 101 | } 102 | 103 | // Cache textnode content. 104 | $node_data = $textnode->data; 105 | 106 | // Escape usage of "501(c)(1...29)" (US non-profit). 107 | $node_data = (string) \preg_replace( self::ESCAPE_501C, '$1' . RE::ESCAPE_MARKER . '$2' . RE::ESCAPE_MARKER . '$3', $node_data ); 108 | 109 | // Replace marks. 110 | $node_data = \str_replace( $this->marks, $this->replacements, $node_data ); 111 | 112 | // Un-escape escaped sequences & resetore textnode content. 113 | $textnode->data = \str_replace( RE::ESCAPE_MARKER, '', $node_data ); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-space-collapse-fix.php: -------------------------------------------------------------------------------- 1 | 39 | * 40 | * @since 5.0.0 41 | */ 42 | class Space_Collapse_Fix extends Abstract_Node_Fix { 43 | 44 | const COLLAPSE_NORMAL_SPACES = '/[' . RE::NORMAL_SPACES . ']+/Sxu'; 45 | const COLLAPSE_NON_BREAKABLE_SPACES = '/(?:[' . RE::NORMAL_SPACES . ']|' . RE::HTML_SPACES . ')*' . U::NO_BREAK_SPACE . '(?:[' . RE::NORMAL_SPACES . ']|' . RE::HTML_SPACES . ')*/Sxu'; 46 | const COLLAPSE_OTHER_SPACES = '/(?:[' . RE::NORMAL_SPACES . '])*(' . RE::HTML_SPACES . ')(?:[' . RE::NORMAL_SPACES . ']|' . RE::HTML_SPACES . ')*/Sxu'; 47 | const COLLAPSE_SPACES_AT_START_OF_BLOCK = '/\A(?:[' . RE::NORMAL_SPACES . ']|' . RE::HTML_SPACES . ')+/Sxu'; 48 | 49 | /** 50 | * Apply the fix to a given textnode. 51 | * 52 | * @since 7.0.0 All parameters are now required. 53 | * 54 | * @param \DOMText $textnode The DOM node. 55 | * @param Settings $settings The settings to apply. 56 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 57 | * 58 | * @return void 59 | */ 60 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 61 | if ( empty( $settings->space_collapse ) ) { 62 | return; 63 | } 64 | 65 | // Cache textnode content. 66 | $node_data = $textnode->data; 67 | 68 | // Replace spaces. 69 | $node_data = (string) \preg_replace( 70 | [ 71 | // Normal spacing. 72 | self::COLLAPSE_NORMAL_SPACES, 73 | // Non-breakable space get's priority. If non-breakable space exists in a string of spaces, it collapses to a single non-breakable space. 74 | self::COLLAPSE_NON_BREAKABLE_SPACES, 75 | // For any other spaceing, replace with the first occurrence of an unusual space character. 76 | self::COLLAPSE_OTHER_SPACES, 77 | ], 78 | [ // @codeCoverageIgnoreStart 79 | ' ', 80 | U::NO_BREAK_SPACE, 81 | '$1', 82 | ], // @codeCoverageIgnoreEnd 83 | $node_data 84 | ); 85 | 86 | // Remove all spacing at beginning of block level elements. 87 | if ( DOM::get_first_textnode( $textnode ) === $textnode ) { 88 | $node_data = (string) \preg_replace( self::COLLAPSE_SPACES_AT_START_OF_BLOCK, '', $node_data ); 89 | } 90 | 91 | // Restore textnode content. 92 | $textnode->data = $node_data; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-style-ampersands-fix.php: -------------------------------------------------------------------------------- 1 | (i.e. H&J becomes H&J), 33 | * if enabled. 34 | * 35 | * Call after style_caps so H&J becomes H&J. 36 | * 37 | * @author Peter Putzer 38 | * 39 | * @since 5.0.0 40 | * @since 6.0.0 The replacement now assumes decoded ampersands (i.e. plain "&" instead of "&"). 41 | */ 42 | class Style_Ampersands_Fix extends Simple_Style_Fix { 43 | 44 | /** 45 | * Creates a new node fix with a class. 46 | * 47 | * @param string $css_class HTML class used in markup. 48 | * @param bool $feed_compatible Optional. Default false. 49 | */ 50 | public function __construct( $css_class, $feed_compatible = false ) { 51 | parent::__construct( '/(&)/S', Settings::STYLE_AMPERSANDS, $css_class, $feed_compatible ); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-style-caps-fix.php: -------------------------------------------------------------------------------- 1 | if enabled. 34 | * 35 | * Call before style_numbers(). Only call if you are certain that no html tags have 36 | * been injected containing capital letters. 37 | * 38 | * @author Peter Putzer 39 | * 40 | * @since 5.0.0 41 | */ 42 | class Style_Caps_Fix extends Simple_Style_Fix { 43 | 44 | // PCRE needs to be compiled with "--enable-unicode-properties", but we already depend on that elsewhere. 45 | const REGEX = '/ 46 | (? (even numbers that appear inside a word, 33 | * i.e. A9 becomes A9), if enabled. 34 | * 35 | * Call after style_caps so A9 becomes A9. 36 | * Call after smart_fractions and smart_ordinal_suffix. 37 | * Only call if you are certain that no html tags have been injected containing numbers. 38 | * 39 | * @author Peter Putzer 40 | * 41 | * @since 5.0.0 42 | */ 43 | class Style_Numbers_Fix extends Simple_Style_Fix { 44 | 45 | /** 46 | * Creates a new node fix with a class. 47 | * 48 | * @param string $css_class HTML class used in markup. 49 | * @param bool $feed_compatible Optional. Default false. 50 | */ 51 | public function __construct( $css_class, $feed_compatible = false ) { 52 | parent::__construct( '/([0-9]+)/S', Settings::STYLE_NUMBERS, $css_class, $feed_compatible ); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-unicode-remapping-fix.php: -------------------------------------------------------------------------------- 1 | 35 | * 36 | * @since 7.0.0 37 | */ 38 | class Unicode_Remapping_Fix extends Abstract_Node_Fix { 39 | 40 | /** 41 | * Creates a new node fix. 42 | */ 43 | public function __construct() { 44 | parent::__construct( true ); 45 | } 46 | 47 | /** 48 | * Apply the fix to a given textnode. 49 | * 50 | * @param \DOMText $textnode The DOM node. 51 | * @param Settings $settings The settings to apply. 52 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 53 | * 54 | * @return void 55 | */ 56 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 57 | if ( empty( $settings->unicode_character_mapping ) ) { 58 | return; 59 | } 60 | 61 | $textnode->data = \strtr( $textnode->data, $settings->unicode_character_mapping ); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/fixes/node-fixes/class-unit-spacing-fix.php: -------------------------------------------------------------------------------- 1 | 37 | * 38 | * @since 5.0.0 39 | */ 40 | class Unit_Spacing_Fix extends Simple_Regex_Replacement_Fix { 41 | 42 | const REPLACEMENT = '$1' . U::NO_BREAK_NARROW_SPACE . '$2'; 43 | const REGEX = '/(\d\.?)\s(' . self::STANDARD_UNITS . ')' . self::WORD_BOUNDARY . '/Sxu'; 44 | 45 | private const STANDARD_UNITS = ' 46 | ### Temporal units 47 | (?:ms|s|secs?|mins?|hrs?)\.?| 48 | milliseconds?|seconds?|minutes?|hours?|days?|years?|decades?|century|centuries|millennium|millennia| 49 | 50 | ### Imperial units 51 | (?:in|ft|yd|mi)\.?| 52 | (?:ac|ha|oz|pt|qt|gal|lb|st)\.? 53 | s\.f\.|sf|s\.i\.|si|square[ ]feet|square[ ]foot| 54 | inch|inches|foot|feet|yards?|miles?|acres?|hectares?|ounces?|pints?|quarts?|gallons?|pounds?|stones?| 55 | 56 | ### Metric units (with prefixes) 57 | (?:p|µ|[mcdhkMGT])? 58 | (?:[mgstAKNJWCVFSTHBL]|mol|cd|rad|Hz|Pa|Wb|lm|lx|Bq|Gy|Sv|kat|Ω)| 59 | (?:nano|micro|milli|centi|deci|deka|hecto|kilo|mega|giga|tera)? 60 | (?:liters?|meters?|grams?|newtons?|pascals?|watts?|joules?|amperes?)| 61 | 62 | ### Computers units (KB, Kb, TB, Kbps) 63 | [kKMGT]?(?:[oBb]|[oBb]ps|flops)| 64 | 65 | ### Money 66 | ¢|M?(?:£|¥|€|\$)| 67 | 68 | ### Other units 69 | °[CF]? | 70 | %|pi|M?px|em|en|[NSEOW]|[NS][EOW]|mbar 71 | '; // required modifiers: x (multiline pattern), u (unicode). 72 | 73 | // (?=\p{^L})|\z) is used instead of \b because otherwise the special symbols ($, € etc.) would not match properly (they are not word characters). 74 | const WORD_BOUNDARY = '(?:(?=\p{^L})|\z)'; 75 | 76 | /** 77 | * Creates a new fix object. 78 | * 79 | * @param bool $feed_compatible Optional. Default false. 80 | */ 81 | public function __construct( $feed_compatible = false ) { 82 | parent::__construct( self::REGEX, self::REPLACEMENT, Settings::UNIT_SPACING, $feed_compatible ); 83 | } 84 | 85 | /** 86 | * Apply the fix to a given textnode. 87 | * 88 | * @since 7.0.0 All parameters are now required. 89 | * 90 | * @param \DOMText $textnode The DOM node. 91 | * @param Settings $settings The settings to apply. 92 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 93 | * 94 | * @return void 95 | */ 96 | public function apply( \DOMText $textnode, Settings $settings, $is_title ) { 97 | // Update regex with custom units. 98 | $this->regex = "/(\d\.?)\s({$settings->custom_units}" . self::STANDARD_UNITS . ')' . self::WORD_BOUNDARY . '/Sxu'; 99 | 100 | parent::apply( $textnode, $settings, $is_title ); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/fixes/token-fixes/class-abstract-token-fix.php: -------------------------------------------------------------------------------- 1 | 37 | * 38 | * @since 5.0.0 39 | */ 40 | abstract class Abstract_Token_Fix implements Token_Fix { 41 | 42 | /** 43 | * Is this fix compatible with feeds? 44 | * 45 | * @var bool 46 | */ 47 | private $feed_compatible; 48 | 49 | /** 50 | * The target token type. 51 | * 52 | * @var int 53 | */ 54 | private $target; 55 | 56 | /** 57 | * Creates a new fix instance. 58 | * 59 | * @param int $target Required. 60 | * @param bool $feed_compatible Optional. Default false. 61 | */ 62 | protected function __construct( $target, $feed_compatible = false ) { 63 | $this->target = $target; 64 | $this->feed_compatible = $feed_compatible; 65 | } 66 | 67 | /** 68 | * Apply the fix to a given set of tokens 69 | * 70 | * @since 7.0.0 The parameter order has been re-arranged to mirror Node_Fix. 71 | * 72 | * @param Token[] $tokens The set of tokens. 73 | * @param \DOMText $textnode The context DOM node. 74 | * @param Settings $settings The settings to apply. 75 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 76 | * 77 | * @return Token[] The fixed set of tokens. 78 | */ 79 | abstract public function apply( array $tokens, \DOMText $textnode, Settings $settings, $is_title ); 80 | 81 | /** 82 | * Determines whether the fix should be applied to (RSS) feeds. 83 | * 84 | * @return bool 85 | */ 86 | public function feed_compatible() { 87 | return $this->feed_compatible; 88 | } 89 | 90 | /** 91 | * Retrieves the target token array for this fix. 92 | * 93 | * @return int 94 | */ 95 | public function target() { 96 | return $this->target; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/fixes/token-fixes/class-hyphenate-compounds-fix.php: -------------------------------------------------------------------------------- 1 | 41 | * 42 | * @since 5.0.0 43 | */ 44 | class Hyphenate_Compounds_Fix extends Hyphenate_Fix { 45 | 46 | /** 47 | * Creates a new fix instance. 48 | * 49 | * @param Cache|null $cache Optional. Default null. 50 | * @param bool $feed_compatible Optional. Default false. 51 | */ 52 | public function __construct( Cache $cache = null, $feed_compatible = false ) { 53 | parent::__construct( $cache, Token_Fix::COMPOUND_WORDS, $feed_compatible ); 54 | } 55 | 56 | /** 57 | * Apply the fix to a given set of tokens 58 | * 59 | * @since 7.0.0 The parameter order has been re-arranged to mirror Node_Fix. 60 | * 61 | * @param Token[] $tokens The set of tokens. 62 | * @param \DOMText $textnode The context DOM node. 63 | * @param Settings $settings The settings to apply. 64 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 65 | * 66 | * @return Token[] The fixed set of tokens. 67 | */ 68 | public function apply( array $tokens, \DOMText $textnode, Settings $settings, $is_title ) { 69 | if ( empty( $settings->hyphenate_compounds ) ) { 70 | return $tokens; // abort. 71 | } 72 | 73 | // Hyphenate compound words. 74 | foreach ( $tokens as $key => $word_token ) { 75 | $component_words = []; 76 | $word_parts = \preg_split( '/(-)/', $word_token->value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ) ?: []; // phpcs:ignore Universal.Operators.DisallowShortTernary -- Ensure array type. 77 | foreach ( $word_parts as $word_part ) { 78 | $component_words[] = new Text_Parser\Token( $word_part, Text_Parser\Token::WORD ); 79 | } 80 | 81 | $tokens[ $key ] = $word_token->with_value( 82 | \array_reduce( 83 | parent::apply( $component_words, $textnode, $settings, $is_title ), 84 | function ( ?string $carry, Token $item ): string { 85 | return $carry . $item->value; 86 | }, 87 | '' 88 | ) 89 | ); 90 | } 91 | 92 | return $tokens; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/fixes/token-fixes/class-smart-dashes-hyphen-fix.php: -------------------------------------------------------------------------------- 1 | 38 | * 39 | * @since 6.3.0 40 | */ 41 | class Smart_Dashes_Hyphen_Fix extends Abstract_Token_Fix { 42 | 43 | /** 44 | * Creates a new fix instance. 45 | * 46 | * @param bool $feed_compatible Optional. Default false. 47 | */ 48 | public function __construct( $feed_compatible = false ) { 49 | parent::__construct( Token_Fix::MIXED_WORDS, $feed_compatible ); 50 | } 51 | 52 | /** 53 | * Apply the fix to a given set of tokens 54 | * 55 | * @since 7.0.0 The parameter order has been re-arranged to mirror Node_Fix. 56 | * 57 | * @param Token[] $tokens The set of tokens. 58 | * @param \DOMText $textnode The context DOM node. 59 | * @param Settings $settings The settings to apply. 60 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 61 | * 62 | * @return Token[] The fixed set of tokens. 63 | */ 64 | public function apply( array $tokens, \DOMText $textnode, Settings $settings, $is_title ) { 65 | if ( ! empty( $settings->smart_dashes ) ) { 66 | foreach ( $tokens as $index => $text_token ) { 67 | // Handled here because we need to know we are inside a word and not a URL. 68 | $tokens[ $index ] = $text_token->with_value( \str_replace( '-', U::HYPHEN, $text_token->value ) ); 69 | } 70 | } 71 | 72 | return $tokens; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/fixes/token-fixes/class-wrap-emails-fix.php: -------------------------------------------------------------------------------- 1 | 39 | * 40 | * @since 5.0.0 41 | */ 42 | class Wrap_Emails_Fix extends Abstract_Token_Fix { 43 | 44 | /** 45 | * A regular expression matching email addresses. 46 | * 47 | * @var string 48 | */ 49 | protected $email_pattern; 50 | 51 | /** 52 | * Creates a new fix instance. 53 | * 54 | * @param bool $feed_compatible Optional. Default false. 55 | */ 56 | public function __construct( $feed_compatible = false ) { 57 | parent::__construct( Token_Fix::OTHER, $feed_compatible ); 58 | 59 | $this->email_pattern = "/(?: 60 | \A 61 | [a-z0-9\!\#\$\%\&\'\*\+\/\=\?\^\_\`\{\|\}\~\-]+ 62 | (?: 63 | \. 64 | [a-z0-9\!\#\$\%\&\'\*\+\/\=\?\^\_\`\{\|\}\~\-]+ 65 | )* 66 | @ 67 | (?: 68 | [a-z0-9] 69 | [a-z0-9\-]{0,61} 70 | [a-z0-9] 71 | \. 72 | )+ 73 | (?: 74 | " . RE::top_level_domains() . ' 75 | ) 76 | \Z 77 | )/Sxi'; // required modifiers: x (multiline pattern) i (case insensitive). 78 | } 79 | 80 | /** 81 | * Apply the fix to a given set of tokens 82 | * 83 | * @since 7.0.0 The parameter order has been re-arranged to mirror Node_Fix. 84 | * 85 | * @param Token[] $tokens The set of tokens. 86 | * @param \DOMText $textnode The context DOM node. 87 | * @param Settings $settings The settings to apply. 88 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 89 | * 90 | * @return Token[] The fixed set of tokens. 91 | */ 92 | public function apply( array $tokens, \DOMText $textnode, Settings $settings, $is_title ) { 93 | if ( empty( $settings->wrap_emails ) ) { 94 | return $tokens; 95 | } 96 | 97 | // Test for and parse urls. 98 | foreach ( $tokens as $index => $token ) { 99 | $value = $token->value; 100 | if ( \preg_match( $this->email_pattern, $value, $email_match ) ) { 101 | $tokens[ $index ] = $token->with_value( (string) \preg_replace( '/([^a-zA-Z0-9])/S', '$1' . U::ZERO_WIDTH_SPACE, $value ) ); 102 | } 103 | } 104 | 105 | return $tokens; 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/fixes/token-fixes/class-wrap-hard-hyphens-fix.php: -------------------------------------------------------------------------------- 1 | 38 | * 39 | * @since 5.0.0 40 | */ 41 | class Wrap_Hard_Hyphens_Fix extends Abstract_Token_Fix { 42 | 43 | /** 44 | * An array of "hyphen-like" characters. 45 | * 46 | * @var string[] 47 | */ 48 | protected $hyphens_array; 49 | 50 | /** 51 | * The regular expression to strip the space from hyphen-like characters at the end of a string. 52 | * 53 | * @var string 54 | */ 55 | protected $remove_ending_space_regex; 56 | 57 | /** 58 | * Creates a new fix instance. 59 | * 60 | * @param bool $feed_compatible Optional. Default false. 61 | */ 62 | public function __construct( $feed_compatible = false ) { 63 | parent::__construct( Token_Fix::MIXED_WORDS, $feed_compatible ); 64 | 65 | $this->hyphens_array = \array_unique( [ '-', U::HYPHEN ] ); 66 | $this->remove_ending_space_regex = '/(' . \implode( '|', $this->hyphens_array ) . ')' . U::ZERO_WIDTH_SPACE . '$/'; 67 | } 68 | 69 | /** 70 | * Apply the fix to a given set of tokens 71 | * 72 | * @since 7.0.0 The parameter order has been re-arranged to mirror Node_Fix. 73 | * 74 | * @param Token[] $tokens The set of tokens. 75 | * @param \DOMText $textnode The context DOM node. 76 | * @param Settings $settings The settings to apply. 77 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 78 | * 79 | * @return Token[] The fixed set of tokens. 80 | */ 81 | public function apply( array $tokens, \DOMText $textnode, Settings $settings, $is_title ) { 82 | if ( ! empty( $settings->wrap_hard_hyphens ) ) { 83 | 84 | foreach ( $tokens as $index => $text_token ) { 85 | $value = $text_token->value; 86 | 87 | $value = \str_replace( $this->hyphens_array, '-' . U::ZERO_WIDTH_SPACE, $value ); 88 | $value = \str_replace( '_', '_' . U::ZERO_WIDTH_SPACE, $value ); 89 | $value = \str_replace( '/', '/' . U::ZERO_WIDTH_SPACE, $value ); 90 | 91 | $value = (string) \preg_replace( $this->remove_ending_space_regex, '$1', $value ); 92 | 93 | $tokens[ $index ] = $text_token->with_value( $value ); 94 | } 95 | } 96 | 97 | return $tokens; 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/hyphenator/class-cache.php: -------------------------------------------------------------------------------- 1 | 37 | */ 38 | class Cache { 39 | 40 | /** 41 | * An array of Hyphenator instances indexed by language. 42 | * 43 | * @var array 44 | */ 45 | protected array $cache = []; 46 | 47 | /** 48 | * A flag that indicated that the cache has changed since creation/deserialization. 49 | * 50 | * @var bool 51 | */ 52 | protected bool $changed = false; 53 | 54 | /** 55 | * Ignore the "changed" flag during serialization. 56 | * 57 | * @return string[] 58 | */ 59 | public function __sleep(): array { 60 | return [ 61 | 'cache', 62 | ]; 63 | } 64 | 65 | /** 66 | * Caches a Hyphenator instance. 67 | * 68 | * @param string $lang A language code. 69 | * @param Hyphenator $hyphenator The object to cache. 70 | */ 71 | public function set_hyphenator( string $lang, Hyphenator $hyphenator ): void { 72 | $this->cache[ $lang ] = $hyphenator; 73 | $this->changed = true; 74 | } 75 | 76 | /** 77 | * Retrieves a cached Hyphenator. 78 | * 79 | * @param string $lang A language code. 80 | * 81 | * @return ?Hyphenator 82 | */ 83 | public function get_hyphenator( string $lang ): ?Hyphenator { 84 | if ( isset( $this->cache[ $lang ] ) ) { 85 | return $this->cache[ $lang ]; 86 | } 87 | 88 | return null; 89 | } 90 | 91 | /** 92 | * Determines whether the cache (not its content) has been modified since 93 | * instance creatino (or deserialization). 94 | * 95 | * @return bool 96 | */ 97 | public function has_changed(): bool { 98 | return $this->changed; 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/hyphenator/class-trie-node.php: -------------------------------------------------------------------------------- 1 | 36 | * 37 | * @since 5.0.0 38 | */ 39 | final class Trie_Node { 40 | 41 | /** 42 | * The offsets array. 43 | * 44 | * @var array 45 | */ 46 | private array $offsets = []; 47 | 48 | /** 49 | * Linked trie nodes. 50 | * 51 | * @var array { 52 | * @type Trie_Node $char The next node in the given character path. 53 | * } 54 | */ 55 | private array $links = []; 56 | 57 | /** 58 | * Create new Trie_Node. 59 | */ 60 | private function __construct() { 61 | } 62 | 63 | /** 64 | * Retrieves the node for the given letter (or creates it). 65 | * 66 | * @param string $char A single character. 67 | * 68 | * @return Trie_Node 69 | */ 70 | public function get_node( string $char ): Trie_Node { 71 | if ( ! isset( $this->links[ $char ] ) ) { 72 | $this->links[ $char ] = new Trie_Node(); 73 | } 74 | 75 | return $this->links[ $char ]; 76 | } 77 | 78 | /** 79 | * Checks if there is a node for the given letter. 80 | * 81 | * @param string $char A single character. 82 | * 83 | * @return bool 84 | */ 85 | public function exists( string $char ): bool { 86 | return ! empty( $this->links[ $char ] ); 87 | } 88 | 89 | /** 90 | * Retrieves the offsets array. 91 | * 92 | * @return array 93 | */ 94 | public function offsets(): array { 95 | return $this->offsets; 96 | } 97 | 98 | /** 99 | * Builds pattern search trie from pattern list(s). 100 | * 101 | * @param array $patterns An array of hyphenation patterns. 102 | * 103 | * @return Trie_Node The starting node of the trie. 104 | */ 105 | public static function build_trie( array $patterns ): Trie_Node { 106 | $trie = new Trie_Node(); 107 | 108 | foreach ( $patterns as $key => $pattern ) { 109 | $node = $trie; 110 | 111 | foreach ( \mb_str_split( $key ) as $char ) { 112 | $node = $node->get_node( $char ); 113 | } 114 | 115 | \preg_match_all( '/([1-9])/S', $pattern, $offsets, \PREG_OFFSET_CAPTURE ); 116 | $node->offsets = $offsets[1]; // @phpstan-ignore-line -- The array contains only ints because of the regex. 117 | } 118 | 119 | return $trie; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/settings/class-dashes.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 5.0.0 35 | * @since 7.0.0 Changed into an abstract class. 36 | */ 37 | abstract class Dashes implements \JsonSerializable { 38 | 39 | /** 40 | * Retrieves the dash used for interval dashes. 41 | * 42 | * @return string 43 | */ 44 | abstract public function interval_dash(): string; 45 | 46 | /** 47 | * Retrieves the space character used around interval dashes. 48 | * 49 | * @return string 50 | */ 51 | abstract public function interval_space(): string; 52 | 53 | /** 54 | * Retrieves the dash used for parenthetical dashes. 55 | * 56 | * @return string 57 | */ 58 | abstract public function parenthetical_dash(): string; 59 | 60 | /** 61 | * Retrieves the space character used around parenthetical dashes. 62 | * 63 | * @return string 64 | */ 65 | abstract public function parenthetical_space(): string; 66 | 67 | /** 68 | * Provides a JSON serialization of the settings. 69 | * 70 | * @since 7.0.0 71 | * 72 | * @return mixed 73 | */ 74 | #[\ReturnTypeWillChange] 75 | public function jsonSerialize() { 76 | return [ 77 | 'interval_dash' => $this->interval_dash(), 78 | 'interval_space' => $this->interval_space(), 79 | 'parenthetical_dash' => $this->parenthetical_dash(), 80 | 'parenthetical_space' => $this->parenthetical_space(), 81 | ]; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/settings/class-quotes.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 5.0.0 35 | * @since 7.0.0 Changed to abstract class. 36 | */ 37 | abstract class Quotes implements \JsonSerializable { 38 | 39 | /** 40 | * Retrieves the styles opening quote characters. 41 | * 42 | * @return string 43 | */ 44 | abstract public function open(): string; 45 | 46 | /** 47 | * Retrieves the styles closing quote characters. 48 | * 49 | * @return string 50 | */ 51 | abstract public function close(): string; 52 | 53 | /** 54 | * Provides a JSON serialization of the settings. 55 | * 56 | * @since 7.0.0 57 | * 58 | * @return mixed 59 | */ 60 | #[\ReturnTypeWillChange] 61 | public function jsonSerialize() { 62 | return [ 63 | 'open' => $this->open(), 64 | 'close' => $this->close(), 65 | ]; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/settings/class-simple-dashes.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 5.0.0 35 | */ 36 | final class Simple_Dashes extends Dashes { 37 | 38 | /** 39 | * The dash character used for parenthetical dashes. 40 | * 41 | * @var string 42 | */ 43 | private $parenthetical_dash; 44 | 45 | /** 46 | * The space character used around parenthetical dashes. 47 | * 48 | * @var string 49 | */ 50 | private string $parenthetical_space; 51 | 52 | /** 53 | * The dash character used for interval dashes. 54 | * 55 | * @var string 56 | */ 57 | private string $interval_dash; 58 | 59 | /** 60 | * The space character used around interval dashes. 61 | * 62 | * @var string 63 | */ 64 | private string $interval_space; 65 | 66 | /** 67 | * Creates a new quotes object. 68 | * 69 | * @param string $parenthetical The dash character used for parenthetical dashes. 70 | * @param string $parenthetical_space The space character used around parenthetical dashes. 71 | * @param string $interval The dash character used for interval dashes. 72 | * @param string $interval_space The space character used around interval dashes. 73 | */ 74 | public function __construct( $parenthetical, $parenthetical_space, $interval, $interval_space ) { 75 | $this->parenthetical_dash = $parenthetical; 76 | $this->parenthetical_space = $parenthetical_space; 77 | $this->interval_dash = $interval; 78 | $this->interval_space = $interval_space; 79 | } 80 | 81 | /** 82 | * Retrieves the dash used for interval dashes. 83 | * 84 | * @return string 85 | */ 86 | public function interval_dash(): string { 87 | return $this->interval_dash; 88 | } 89 | 90 | /** 91 | * Retrieves the space character used around interval dashes. 92 | * 93 | * @return string 94 | */ 95 | public function interval_space(): string { 96 | return $this->interval_space; 97 | } 98 | 99 | /** 100 | * Retrieves the dash used for parenthetical dashes. 101 | * 102 | * @return string 103 | */ 104 | public function parenthetical_dash(): string { 105 | return $this->parenthetical_dash; 106 | } 107 | 108 | /** 109 | * Retrieves the space character used around parenthetical dashes. 110 | * 111 | * @return string 112 | */ 113 | public function parenthetical_space(): string { 114 | return $this->parenthetical_space; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/settings/class-simple-quotes.php: -------------------------------------------------------------------------------- 1 | 33 | * 34 | * @since 5.0.0 35 | */ 36 | final class Simple_Quotes extends Quotes { 37 | 38 | /** 39 | * Opening quote character(s). 40 | * 41 | * @var string 42 | */ 43 | private string $open; 44 | 45 | /** 46 | * Closing quote character(s). 47 | * 48 | * @var string 49 | */ 50 | private string $close; 51 | 52 | /** 53 | * Creates a new quotes object. 54 | * 55 | * @param string $open Opening quote character(s). 56 | * @param string $close Closing quote character(s). 57 | */ 58 | public function __construct( $open, $close ) { 59 | $this->open = $open; 60 | $this->close = $close; 61 | } 62 | 63 | /** 64 | * Retrieves the quote styles opening quote characters. 65 | * 66 | * @return string 67 | */ 68 | public function open(): string { 69 | return $this->open; 70 | } 71 | 72 | /** 73 | * Retrieves the quote styles closing quote characters. 74 | * 75 | * @return string 76 | */ 77 | public function close(): string { 78 | return $this->close; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /tests/benchmarks/class-dom-bench.php: -------------------------------------------------------------------------------- 1 | 'A short fragment 1+2=3', 49 | ], 50 | [ 51 | 'html' => "
Test
", 52 | ], 53 | ]; 54 | } 55 | 56 | /** 57 | * Set up fixtures. 58 | * 59 | * @param array $params Parameters. 60 | */ 61 | public function set_up( array $params ) { 62 | $parser = new \Masterminds\HTML5( [ 'disable_html_ns' => true ] ); 63 | $this->dom = $parser->loadHTML( $params['html'] ); 64 | } 65 | 66 | /** 67 | * Benchmark the process method. 68 | * 69 | * @ParamProviders({"provide_dom"}) 70 | * 71 | * @param array $params The parameters. 72 | */ 73 | public function bench_get_element_by_tag_name( $params ) { 74 | for ( $i = 0; $i < 1000; ++$i ) { 75 | $body_node = $this->dom->getElementsByTagName( 'body' )->item( 0 ); 76 | } 77 | } 78 | 79 | /** 80 | * Benchmark the process method. 81 | * 82 | * @ParamProviders({"provide_dom"}) 83 | * 84 | * @param array $params The parameters. 85 | */ 86 | public function bench_xpath_query( $params ) { 87 | for ( $i = 0; $i < 1000; ++$i ) { 88 | $xpath = new \DOMXPath( $this->dom ); 89 | $body_node = $xpath->query( '/html/body' )->item( 0 ); 90 | } 91 | } 92 | 93 | /** 94 | * Benchmark the process method. 95 | * 96 | * @ParamProviders({"provide_dom"}) 97 | * 98 | * @param array $params The parameters. 99 | */ 100 | public function bench_xpath_query_without_object_creation( $params ) { 101 | $xpath = new \DOMXPath( $this->dom ); 102 | 103 | for ( $i = 0; $i < 1000; ++$i ) { 104 | $body_node = $xpath->query( '/html/body' )->item( 0 ); 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /tests/benchmarks/class-html-parser-bench.php: -------------------------------------------------------------------------------- 1 | html5_parser = new \Masterminds\HTML5( [ 'disable_html_ns' => true ] ); 49 | } 50 | 51 | /** 52 | * Provide parameters for process_bench. 53 | * 54 | * @return array 55 | */ 56 | public function provide_process_filenames() { 57 | return [ 58 | 59 | /* 60 | [ 61 | 'filename' => __DIR__ . '/data/example1.html', 62 | ], 63 | */ 64 | [ 65 | 'html' => 'A short fragment 1+2=3', 66 | ], 67 | [ 68 | 'html' => '
Test
', 69 | ], 70 | ]; 71 | } 72 | 73 | /** 74 | * Benchmark the process method. 75 | * 76 | * @ParamProviders({"provide_process_filenames"}) 77 | * 78 | * @param array $params The parameters. 79 | */ 80 | public function bench_process( $params ) { 81 | 82 | if ( isset( $params['filename'] ) ) { 83 | $html = \file_get_contents( $params['filename'] ); 84 | } else { 85 | $html = $params['html']; 86 | } 87 | 88 | $dom = $this->html5_parser->loadHTML( "{$html}" ); 89 | $dom->encoding = 'UTF-8'; 90 | $xpath = new \DOMXPath( $dom ); 91 | $body_node = $xpath->query( '/html/body' )->item( 0 ); 92 | 93 | $result = $this->html5_parser->saveHTML( $body_node->childNodes ); 94 | if ( $html !== $result ) { 95 | echo "*********\n"; 96 | echo $html; 97 | echo "\n"; 98 | echo $result; 99 | echo "\n"; 100 | 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /tests/benchmarks/class-hyphenator-cache-bench.php: -------------------------------------------------------------------------------- 1 | hyphenator = new Hyphenator( 'de' ); 65 | $this->serialized = serialize( $this->hyphenator ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.serialize_serialize 66 | $this->compressed = base64_encode( gzcompress( $this->serialized ) ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.obfuscation_base64_encode 67 | } 68 | 69 | /** 70 | * Benchmark new object creation. 71 | */ 72 | public function bench_new_hyphenator() { 73 | $de_hyphen = new Hyphenator( 'de' ); 74 | } 75 | 76 | /** 77 | * Benchmark new object creation. 78 | */ 79 | public function bench_serialized_hyphenator() { 80 | $de_hyphen = unserialize( $this->serialized ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions 81 | } 82 | 83 | /** 84 | * Benchmark new object creation. 85 | */ 86 | public function bench_compressed_serialized_hyphenator() { 87 | $de_hyphen = unserialize( gzuncompress( base64_decode( $this->compressed ) ) ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions 88 | } 89 | 90 | /** 91 | * Benchmark new object creation. 92 | */ 93 | public function bench_unserialize_serialized_hyphenator() { 94 | $de_hyphen = unserialize( serialize( $this->hyphenator ) ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions 95 | } 96 | 97 | /** 98 | * Benchmark new object creation. 99 | */ 100 | public function bench_compressed_unserialize_serialized_hyphenator() { 101 | $de_hyphen = unserialize( gzuncompress( base64_decode( base64_encode( gzcompress( serialize( $this->hyphenator ) ) ) ) ) ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions 102 | } 103 | 104 | /** 105 | * Benchmark new object creation. 106 | */ 107 | public function bench_gzencoded_unserialize_serialized_hyphenator() { 108 | $de_hyphen = unserialize( gzdecode( base64_decode( base64_encode( gzencode( serialize( $this->hyphenator ) ) ) ) ) ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /tests/benchmarks/class-php-typography-bench.php: -------------------------------------------------------------------------------- 1 | typo = new \PHP_Typography\PHP_Typography(); 56 | $this->settings = new \PHP_Typography\Settings(); 57 | 58 | $this->typo->process( '', $this->settings ); 59 | } 60 | 61 | /** 62 | * Provide parameters for process_bench. 63 | * 64 | * @return array 65 | */ 66 | public function provide_process_filenames() { 67 | return [ 68 | [ 69 | 'filename' => __DIR__ . '/data/example1.html', 70 | ], 71 | [ 72 | 'html' => 'A short fragment 1+2=3', 73 | ], 74 | ]; 75 | } 76 | 77 | /** 78 | * Benchmark the process method. 79 | * 80 | * @ParamProviders({"provide_process_filenames"}) 81 | * 82 | * @param array $params The parameters. 83 | */ 84 | public function bench_process( $params ) { 85 | 86 | if ( isset( $params['filename'] ) ) { 87 | $html = \file_get_contents( $params['filename'] ); 88 | } else { 89 | $html = $params['html']; 90 | } 91 | 92 | $this->typo->process( $html, $this->settings ); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /tests/bootstrap.php: -------------------------------------------------------------------------------- 1 | $function ) { 47 | if ( 'u' !== $name ) { 48 | $this->assertTrue( is_callable( $function ) ); 49 | } 50 | } 51 | } 52 | 53 | /** 54 | * Test ::functions. 55 | * 56 | * @covers ::functions 57 | */ 58 | public function test_functions() { 59 | $func_ascii = Strings::functions( 'ASCII' ); 60 | $func_utf8 = Strings::functions( 'UTF-8 üäß' ); 61 | 62 | // We are dealing with ararys. 63 | $this->assertTrue( is_array( $func_ascii ) ); 64 | $this->assertTrue( is_array( $func_utf8 ) ); 65 | 66 | // The arrays are not (almost) empty. 67 | $this->assertGreaterThan( 1, count( $func_ascii ), 'ASCII array contains fewer than 2 functions.' ); 68 | $this->assertGreaterThan( 1, count( $func_utf8 ), 'UTF-8 array contains fewer than 2 functions.' ); 69 | 70 | // The keys are identical. 71 | $this->assertSame( array_keys( $func_ascii ), array_keys( $func_utf8 ) ); 72 | 73 | // Each function is a callable (except for the 'u' modifier string). 74 | $this->assert_string_functions( $func_ascii ); 75 | $this->assert_string_functions( $func_utf8 ); 76 | } 77 | 78 | /** 79 | * Test ::functions. 80 | * 81 | * @covers ::functions 82 | */ 83 | public function test_functions_invalid_encoding() { 84 | $this->expect_exception( Invalid_Encoding_Exception::class ); 85 | 86 | $this->assertEmpty( Strings::functions( \mb_convert_encoding( 'Ungültiges Encoding', 'ISO-8859-2' ) ) ); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-classes-dependent-fix-test.php: -------------------------------------------------------------------------------- 1 | getMockForAbstractClass( Node_Fixes\Classes_Dependent_Fix::class, [ [ 'foo', 'bar' ], false ] ); 55 | 56 | $this->assert_attribute_contains( 'foo', 'classes_to_avoid', $fix, 'The fixer should avoid class "foo".' ); 57 | $this->assert_attribute_contains( 'bar', 'classes_to_avoid', $fix, 'The fixer should avoid class "bar".' ); 58 | $this->assert_attribute_not_contains( 'foobar', 'classes_to_avoid', $fix, 'The fixer should not care about class "foobar".' ); 59 | } 60 | 61 | /** 62 | * Tests the constructor. 63 | * 64 | * @covers ::__construct 65 | * 66 | * @uses PHP_Typography\Fixes\Node_Fixes\Abstract_Node_Fix::__construct 67 | */ 68 | public function test_string_constructor() { 69 | $fix = $this->getMockForAbstractClass( Node_Fixes\Classes_Dependent_Fix::class, [ 'bar', false ] ); 70 | 71 | $this->assert_attribute_contains( 'bar', 'classes_to_avoid', $fix, 'The fixer should avoid class "bar".' ); 72 | $this->assert_attribute_not_contains( 'foo', 'classes_to_avoid', $fix, 'The fixer should not care about class "foobar".' ); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-node-fix-testcase.php: -------------------------------------------------------------------------------- 1 | s = new Settings( true ); 58 | } 59 | 60 | /** 61 | * Create a normalilzed textnode. 62 | * 63 | * @param string $value Required. 64 | * 65 | * @return \DOMText 66 | */ 67 | protected function create_textnode( $value ) { 68 | return new \DOMText( html_entity_decode( $value ) ); 69 | } 70 | 71 | /** 72 | * Assert that the output of the fix is the same as the expected result. 73 | * 74 | * @param string $input Text node value. 75 | * @param string $result Expected result. 76 | * @param string|\DOMNode|null $left_sibling Optional. Left sibling node value. Default null. 77 | * @param string|\DOMNode|null $right_sibling Optional. Right sibling node value. Default null. 78 | * @param string $parent_tag Optional. Parent tag. Default 'p'. 79 | * @param bool $is_title Optional. Default false. 80 | */ 81 | protected function assertFixResultSame( $input, $result, $left_sibling = null, $right_sibling = null, $parent_tag = 'p', $is_title = false ) { 82 | $node = $this->create_textnode( $input ); 83 | 84 | if ( ! empty( $left_sibling ) || ! empty( $right_sibling ) ) { 85 | $dom = new \DOMDocument(); 86 | $parent = new \DOMElement( $parent_tag ); 87 | $dom->appendChild( $parent ); 88 | 89 | if ( ! empty( $left_sibling ) ) { 90 | if ( ! $left_sibling instanceof \DOMNode ) { 91 | $left_sibling = $this->create_textnode( $left_sibling ); 92 | } 93 | 94 | $parent->appendChild( $left_sibling ); 95 | } 96 | 97 | $parent->appendChild( $node ); 98 | 99 | if ( ! empty( $right_sibling ) ) { 100 | if ( ! $right_sibling instanceof \DOMNode ) { 101 | $right_sibling = $this->create_textnode( $right_sibling ); 102 | } 103 | 104 | $parent->appendChild( $right_sibling ); 105 | } 106 | } 107 | 108 | $this->fix->apply( $node, $this->s, $is_title ); 109 | $this->assertSame( $this->clean_html( $result ), $this->clean_html( str_replace( [ RE::ESCAPED_HTML_OPEN, RE::ESCAPED_HTML_CLOSE ], [ '<', '>' ], $node->data ) ) ); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-simple-regex-replacement-fix-test.php: -------------------------------------------------------------------------------- 1 | fix = $this->getMockForAbstractClass( Node_Fixes\Simple_Regex_Replacement_Fix::class, [ '/(.*)/', '*$1*', 'fooBar' ] ); 56 | 57 | $this->assert_attribute_same( '/(.*)/S', 'regex', $this->fix ); 58 | $this->assert_attribute_same( 'fooBar', 'settings_switch', $this->fix ); 59 | $this->assert_attribute_same( '*$1*', 'replacement', $this->fix ); 60 | } 61 | 62 | /** 63 | * Provide data for testing apply_internal. 64 | * 65 | * @return array 66 | */ 67 | public function provide_apply_data() { 68 | return [ 69 | [ 'foo & bar', '*foo & bar*' ], 70 | ]; 71 | } 72 | 73 | /** 74 | * Test apply. 75 | * 76 | * @covers ::apply 77 | * 78 | * @uses PHP_Typography\Text_Parser 79 | * @uses PHP_Typography\Text_Parser\Token 80 | * 81 | * @dataProvider provide_apply_data 82 | * 83 | * @param string $input HTML input. 84 | * @param string $result Expected result. 85 | */ 86 | public function test_apply( $input, $result ) { 87 | $this->fix = $this->getMockForAbstractClass( Node_Fixes\Simple_Regex_Replacement_Fix::class, [ '/(.+)/u', '*$1*', 'styleAmpersands' ] ); 88 | $this->s->set_style_ampersands( true ); 89 | 90 | $this->assertFixResultSame( $input, $result ); 91 | } 92 | 93 | /** 94 | * Test apply. 95 | * 96 | * @covers ::apply 97 | * 98 | * @uses PHP_Typography\Text_Parser 99 | * @uses PHP_Typography\Text_Parser\Token 100 | * 101 | * @dataProvider provide_apply_data 102 | * 103 | * @param string $input HTML input. 104 | * @param string $result Expected result. 105 | */ 106 | public function test_apply_off( $input, $result ) { 107 | $this->fix = $this->getMockForAbstractClass( Node_Fixes\Simple_Regex_Replacement_Fix::class, [ '/(.+)/u', '*$1*', 'styleAmpersands' ] ); 108 | $this->s->set_style_ampersands( false ); 109 | 110 | $this->assertFixResultSame( $input, $input ); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-simple-style-fix-test.php: -------------------------------------------------------------------------------- 1 | fix = $this->getMockForAbstractClass( Node_Fixes\Simple_Style_Fix::class, [ '/(.*)/', 'fooBar', 'some-class' ] ); 57 | 58 | $this->assert_attribute_same( '/(.*)/', 'regex', $this->fix ); 59 | $this->assert_attribute_same( 'fooBar', 'settings_switch', $this->fix ); 60 | $this->assert_attribute_same( 'some-class', 'css_class', $this->fix ); 61 | } 62 | 63 | /** 64 | * Provide data for testing apply_internal. 65 | * 66 | * @return array 67 | */ 68 | public function provide_apply_internal_data() { 69 | return [ 70 | [ 'foo & bar', 'foo & bar' ], 71 | ]; 72 | } 73 | 74 | /** 75 | * Test apply. 76 | * 77 | * @covers ::apply_internal 78 | * 79 | * @uses ::apply 80 | * @uses PHP_Typography\Text_Parser 81 | * @uses PHP_Typography\Text_Parser\Token 82 | * @uses PHP_Typography\RE::escape_tags 83 | * 84 | * @dataProvider provide_apply_internal_data 85 | * 86 | * @param string $input HTML input. 87 | * @param string $result Expected result. 88 | */ 89 | public function test_apply( $input, $result ) { 90 | $this->fix = $this->getMockForAbstractClass( Node_Fixes\Simple_Style_Fix::class, [ '/(.+)/u', 'styleAmpersands', 'some-class' ] ); 91 | $this->s->set_style_ampersands( true ); 92 | 93 | $this->assertFixResultSame( $input, $result ); 94 | } 95 | 96 | /** 97 | * Test apply. 98 | * 99 | * @covers ::apply_internal 100 | * 101 | * @uses ::apply 102 | * @uses PHP_Typography\Text_Parser 103 | * @uses PHP_Typography\Text_Parser\Token 104 | * 105 | * @dataProvider provide_apply_internal_data 106 | * 107 | * @param string $input HTML input. 108 | * @param string $result Expected result. 109 | */ 110 | public function test_apply_off( $input, $result ) { 111 | $this->fix = $this->getMockForAbstractClass( Node_Fixes\Simple_Style_Fix::class, [ '/(.+)/u', 'styleAmpersands', 'some-class' ] ); 112 | $this->s->set_style_ampersands( false ); 113 | 114 | $this->assertFixResultSame( $input, $input ); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-smart-area-units-fix-test.php: -------------------------------------------------------------------------------- 1 | fix = new Node_Fixes\Smart_Area_Units_Fix(); 55 | } 56 | 57 | /** 58 | * Providate data for testing smart exponents. 59 | * 60 | * @return array 61 | */ 62 | public function provide_smart_area_and_volume_units_data() { 63 | return [ 64 | [ '10 m2', '10 m²' ], 65 | [ '5.3 m3', '5.3 m³' ], 66 | [ '10 cm2', '10 cm²' ], 67 | [ '10,20 mm2', '10,20 mm²' ], 68 | [ '5 m2, das ergibt 5000000mm2', '5 m², das ergibt 5000000 mm²' ], 69 | [ '3 µm2', '3 µm²' ], 70 | [ '2m2', '2 m²' ], 71 | ]; 72 | } 73 | 74 | /** 75 | * Test apply. 76 | * 77 | * @covers ::apply 78 | * 79 | * @dataProvider provide_smart_area_and_volume_units_data 80 | * 81 | * @param string $input HTML input. 82 | * @param string $result Expected result. 83 | */ 84 | public function test_apply( $input, $result ) { 85 | $this->s->set_smart_area_units( true ); 86 | 87 | $this->assertFixResultSame( $input, $result ); 88 | } 89 | 90 | /** 91 | * Test apply. 92 | * 93 | * @covers ::apply 94 | * 95 | * @dataProvider provide_smart_area_and_volume_units_data 96 | * 97 | * @param string $input HTML input. 98 | * @param string $result Expected result. 99 | */ 100 | public function test_apply_off( $input, $result ) { 101 | $this->s->set_smart_area_units( false ); 102 | 103 | $this->assertFixResultSame( $input, $input ); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-smart-ellipses-fix-test.php: -------------------------------------------------------------------------------- 1 | fix = new Node_Fixes\Smart_Ellipses_Fix(); 54 | } 55 | 56 | /** 57 | * Provide data for testing smart_ellipses. 58 | * 59 | * @return array 60 | */ 61 | public function provide_smart_ellipses_data() { 62 | return [ 63 | [ 'Where are we going... Really....?', 'Where are we going… Really.…?' ], 64 | ]; 65 | } 66 | 67 | /** 68 | * Test apply. 69 | * 70 | * @covers ::apply 71 | * 72 | * @dataProvider provide_smart_ellipses_data 73 | * 74 | * @param string $input HTML input. 75 | * @param string $result Expected result. 76 | */ 77 | public function test_apply( $input, $result ) { 78 | $this->s->set_smart_ellipses( true ); 79 | 80 | $this->assertFixResultSame( $input, $result ); 81 | } 82 | 83 | /** 84 | * Test apply. 85 | * 86 | * @covers ::apply 87 | * 88 | * @dataProvider provide_smart_ellipses_data 89 | * 90 | * @param string $input HTML input. 91 | * @param string $result Expected result. 92 | */ 93 | public function test_apply_off( $input, $result ) { 94 | $this->s->set_smart_ellipses( false ); 95 | 96 | $this->assertFixResultSame( $input, $input ); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-smart-exponents-fix-test.php: -------------------------------------------------------------------------------- 1 | fix = new Node_Fixes\Smart_Exponents_Fix(); 56 | } 57 | 58 | /** 59 | * Providate data for testing smart exponents. 60 | * 61 | * @return array 62 | */ 63 | public function provide_smart_exponents_data() { 64 | return [ 65 | [ '10^12', '1012' ], 66 | ]; 67 | } 68 | 69 | /** 70 | * Test apply. 71 | * 72 | * @covers ::__construct 73 | * 74 | * @uses PHP_Typography\Fixes\Node_Fixes\Simple_Regex_Replacement_Fix::apply 75 | * @uses PHP_Typography\RE::escape_tags 76 | * 77 | * @dataProvider provide_smart_exponents_data 78 | * 79 | * @param string $input HTML input. 80 | * @param string $result Expected result. 81 | */ 82 | public function test_apply( $input, $result ) { 83 | $this->s->set_smart_exponents( true ); 84 | 85 | $this->assertFixResultSame( $input, $result ); 86 | } 87 | 88 | /** 89 | * Test apply. 90 | * 91 | * @covers ::__construct 92 | * 93 | * @uses PHP_Typography\Fixes\Node_Fixes\Simple_Regex_Replacement_Fix::apply 94 | * @uses PHP_Typography\RE::escape_tags 95 | * 96 | * @dataProvider provide_smart_exponents_data 97 | * 98 | * @param string $input HTML input. 99 | * @param string $result Expected result. 100 | */ 101 | public function test_apply_off( $input, $result ) { 102 | $this->s->set_smart_exponents( false ); 103 | 104 | $this->assertFixResultSame( $input, $input ); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-smart-marks-fix-test.php: -------------------------------------------------------------------------------- 1 | fix = new Node_Fixes\Smart_Marks_Fix(); 55 | } 56 | 57 | /** 58 | * Tests the constructor. 59 | * 60 | * @covers ::__construct 61 | */ 62 | public function test_constructor() { 63 | $this->fix = $this->getMockForAbstractClass( Node_Fixes\Smart_Marks_Fix::class, [ false ] ); 64 | 65 | $this->assert_is_array( $this->get_value( $this->fix, 'marks' ) ); 66 | $this->assert_is_array( $this->get_value( $this->fix, 'replacements' ) ); 67 | } 68 | 69 | /** 70 | * Provide data for testing smart_marks. 71 | * 72 | * @return array 73 | */ 74 | public function provide_smart_marks_data() { 75 | return [ 76 | [ '(c)', '©' ], 77 | [ '(C)', '©' ], 78 | [ '(r)', '®' ], 79 | [ '(R)', '®' ], 80 | [ '(p)', '℗' ], 81 | [ '(P)', '℗' ], 82 | [ '(sm)', '℠' ], 83 | [ '(SM)', '℠' ], 84 | [ '(tm)', '™' ], 85 | [ '(TM)', '™' ], 86 | [ '501(c)(1)', '501(c)(1)' ], // protected. 87 | [ '501(c)(29)', '501(c)(29)' ], // protected. 88 | [ '501(c)(30)', '501©(30)' ], // not protected. 89 | ]; 90 | } 91 | 92 | /** 93 | * Test apply. 94 | * 95 | * @covers ::apply 96 | * 97 | * @dataProvider provide_smart_marks_data 98 | * 99 | * @param string $input HTML input. 100 | * @param string $result Expected result. 101 | */ 102 | public function test_apply( $input, $result ) { 103 | $this->s->set_smart_marks( true ); 104 | 105 | $this->assertFixResultSame( $input, $result ); 106 | } 107 | 108 | /** 109 | * Test apply. 110 | * 111 | * @covers ::apply 112 | * 113 | * @dataProvider provide_smart_marks_data 114 | * 115 | * @param string $input HTML input. 116 | * @param string $result Expected result. 117 | */ 118 | public function test_apply_off( $input, $result ) { 119 | $this->s->set_smart_marks( false ); 120 | 121 | $this->assertFixResultSame( $input, $input ); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-space-collapse-fix-test.php: -------------------------------------------------------------------------------- 1 | fix = new Node_Fixes\Space_Collapse_Fix(); 55 | } 56 | 57 | /** 58 | * Provide data for special white space collapsing. 59 | * 60 | * @return array 61 | */ 62 | public function provide_space_collapse_data() { 63 | return [ 64 | [ 'A new hope  arises.', 'A new hope arises.' ], 65 | [ 'A  new hope   arises.', 'A new hope arises.' ], 66 | [ 'Årø Bilsenter', 'Årø Bilsenter' ], 67 | ]; 68 | } 69 | 70 | /** 71 | * Test apply. 72 | * 73 | * @covers ::apply 74 | * 75 | * @dataProvider provide_space_collapse_data 76 | * 77 | * @param string $input HTML input. 78 | * @param string $result Expected result. 79 | */ 80 | public function test_apply( $input, $result ) { 81 | $this->s->set_space_collapse( true ); 82 | 83 | $this->assertFixResultSame( $input, $result ); 84 | } 85 | 86 | /** 87 | * Test apply. 88 | * 89 | * @covers ::apply 90 | * 91 | * @dataProvider provide_space_collapse_data 92 | * 93 | * @param string $input HTML input. 94 | * @param string $result Expected result. 95 | */ 96 | public function test_apply_off( $input, $result ) { 97 | $this->s->set_space_collapse( false ); 98 | 99 | $this->assertFixResultSame( $input, $input ); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-style-ampersands-fix-test.php: -------------------------------------------------------------------------------- 1 | & bar' ], 58 | [ '&', '&' ], 59 | [ 'R&D', 'R&D' ], 60 | ]; 61 | } 62 | 63 | /** 64 | * Test apply. 65 | * 66 | * @covers ::apply 67 | * @covers ::__construct 68 | * 69 | * @uses PHP_Typography\RE::escape_tags 70 | * @uses PHP_Typography\Text_Parser 71 | * @uses PHP_Typography\Text_Parser\Token 72 | * 73 | * @dataProvider provide_style_ampersands_data 74 | * 75 | * @param string $input HTML input. 76 | * @param string $result Expected result. 77 | */ 78 | public function test_apply( $input, $result ) { 79 | $this->fix = new Node_Fixes\Style_Ampersands_Fix( 'amp' ); 80 | $this->s->set_style_ampersands( true ); 81 | 82 | $this->assertFixResultSame( $input, $result ); 83 | } 84 | 85 | /** 86 | * Test apply. 87 | * 88 | * @covers ::apply 89 | * @covers ::__construct 90 | * 91 | * @uses PHP_Typography\RE::escape_tags 92 | * @uses PHP_Typography\Text_Parser 93 | * @uses PHP_Typography\Text_Parser\Token 94 | * 95 | * @dataProvider provide_style_ampersands_data 96 | * 97 | * @param string $input HTML input. 98 | * @param string $result Expected result. 99 | */ 100 | public function test_apply_off( $input, $result ) { 101 | $this->fix = new Node_Fixes\Style_Ampersands_Fix( 'amp' ); 102 | $this->s->set_style_ampersands( false ); 103 | 104 | $this->assertFixResultSame( $input, $input ); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-style-caps-fix-test.php: -------------------------------------------------------------------------------- 1 | BAR bar' ], 58 | [ 'foo BARbaz', 'foo BARbaz' ], 59 | [ 'foo BAR123 baz', 'foo BAR123 baz' ], 60 | [ 'foo 123BAR baz', 'foo 123BAR baz' ], 61 | [ 'during WP-CLI commands', 'during WP-CLI commands' ], 62 | [ 'during WP‐CLI commands', 'during WP‐CLI commands' ], // HYPHEN instead of HYPHEN-MINUS. 63 | [ 'UNESCO-Welterbestätten', 'UNESCO-Welterbestätten' ], 64 | ]; 65 | } 66 | 67 | /** 68 | * Test apply. 69 | * 70 | * @covers ::apply 71 | * @covers ::__construct 72 | * 73 | * @uses PHP_Typography\RE::escape_tags 74 | * 75 | * @dataProvider provide_style_caps_data 76 | * 77 | * @param string $input HTML input. 78 | * @param string $result Expected result. 79 | */ 80 | public function test_apply( $input, $result ) { 81 | $this->fix = new Node_Fixes\Style_Caps_Fix( 'caps' ); 82 | $this->s->set_style_caps( true ); 83 | 84 | $this->assertFixResultSame( $input, $result ); 85 | } 86 | 87 | /** 88 | * Test apply. 89 | * 90 | * @covers ::apply 91 | * @covers ::__construct 92 | * 93 | * @uses PHP_Typography\RE::escape_tags 94 | * 95 | * @dataProvider provide_style_caps_data 96 | * 97 | * @param string $input HTML input. 98 | * @param string $result Expected result. 99 | */ 100 | public function test_apply_off( $input, $result ) { 101 | $this->fix = new Node_Fixes\Style_Caps_Fix( 'caps' ); 102 | $this->s->set_style_caps( false ); 103 | 104 | $this->assertFixResultSame( $input, $input ); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-style-numbers-fix-test.php: -------------------------------------------------------------------------------- 1 | 123 bar' ], 58 | [ 'foo 123bar baz', 'foo 123bar baz' ], 59 | [ 'foo bar123 baz', 'foo bar123 baz' ], 60 | [ 'foo 123BAR baz', 'foo 123BAR baz' ], 61 | ]; 62 | } 63 | 64 | /** 65 | * Test apply. 66 | * 67 | * @covers ::apply 68 | * @covers ::__construct 69 | * 70 | * @uses PHP_Typography\RE::escape_tags 71 | * 72 | * @dataProvider provide_style_numbers_data 73 | * 74 | * @param string $input HTML input. 75 | * @param string $result Expected result. 76 | */ 77 | public function test_apply( $input, $result ) { 78 | $this->fix = new Node_Fixes\Style_Numbers_Fix( 'numbers' ); 79 | $this->s->set_style_numbers( true ); 80 | 81 | $this->assertFixResultSame( $input, $result ); 82 | } 83 | 84 | /** 85 | * Test apply. 86 | * 87 | * @covers ::apply 88 | * @covers ::__construct 89 | * 90 | * @uses PHP_Typography\RE::escape_tags 91 | * 92 | * @dataProvider provide_style_numbers_data 93 | * 94 | * @param string $input HTML input. 95 | * @param string $result Expected result. 96 | */ 97 | public function test_apply_off( $input, $result ) { 98 | $this->fix = new Node_Fixes\Style_Numbers_Fix( 'numbers' ); 99 | $this->s->set_style_numbers( false ); 100 | 101 | $this->assertFixResultSame( $input, $input ); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-unicode-remapping-fix-test.php: -------------------------------------------------------------------------------- 1 | fix = new Node_Fixes\Unicode_Remapping_Fix(); 56 | } 57 | 58 | /** 59 | * Provides data for testing apply. 60 | * 61 | * @return array 62 | */ 63 | public function provide_character_mapping_data() { 64 | return [ 65 | [ 66 | 'foobar', 67 | [ 68 | 'a' => 'A', 69 | 'r' => 'z', 70 | ], 71 | 'foobAz', 72 | ], 73 | [ 74 | '', 75 | [ 76 | 'a' => 'A', 77 | 'r' => 'z', 78 | ], 79 | '', 80 | ], 81 | ]; 82 | } 83 | 84 | /** 85 | * Test apply. 86 | * 87 | * @covers ::apply 88 | * @covers ::__construct 89 | * 90 | * @uses PHP_Typography\Fixes\Node_Fixes\Simple_Regex_Replacement_Fix::apply 91 | * 92 | * @dataProvider provide_character_mapping_data 93 | * 94 | * @param string $input HTML input. 95 | * @param string[] $mapping The character remapping to apply. 96 | * @param string $result Expected result. 97 | */ 98 | public function test_apply( $input, array $mapping, $result ) { 99 | $this->s = new Settings( false, $mapping ); 100 | 101 | $this->assertFixResultSame( $input, $result ); 102 | } 103 | 104 | /** 105 | * Test apply. 106 | * 107 | * @covers ::apply 108 | * @covers ::__construct 109 | * 110 | * @uses PHP_Typography\Fixes\Node_Fixes\Simple_Regex_Replacement_Fix::apply 111 | * 112 | * @dataProvider provide_character_mapping_data 113 | * 114 | * @param string $input HTML input. 115 | */ 116 | public function test_apply_off( $input ) { 117 | $this->s = new Settings( false, [] ); 118 | 119 | $this->assertFixResultSame( $input, $input ); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /tests/fixes/node-fixes/class-unit-spacing-fix-test.php: -------------------------------------------------------------------------------- 1 | fix = new Node_Fixes\Unit_Spacing_Fix(); 56 | } 57 | 58 | /** 59 | * Provide data for testing unit_spacing. 60 | * 61 | * @return array 62 | */ 63 | public function provide_unit_spacing_data() { 64 | return [ 65 | [ 'It was 2 m from', 'It was 2 m from' ], 66 | [ '3 km/h', '3 km/h' ], 67 | [ '5 sg 44 kg', '5 sg 44 kg' ], 68 | [ '100 °C', '100 °C' ], 69 | [ '10 €', '10 €' ], 70 | [ '10 €', '10 €' ], 71 | [ '1 ¢', '1 ¢' ], 72 | [ '1 $', '1 $' ], 73 | [ '5 nanoamperes', '5 nanoamperes' ], 74 | [ '1 Ω', '1 Ω' ], 75 | [ '1 Ω', '1 Ω' ], 76 | [ '10 m2', '10 m2' ], 77 | [ '10 m²', '10 m²' ], 78 | [ '5 m³', '5 m³' ], 79 | ]; 80 | } 81 | 82 | /** 83 | * Test apply. 84 | * 85 | * @covers ::apply 86 | * @covers ::__construct 87 | * 88 | * @uses PHP_Typography\Fixes\Node_Fixes\Simple_Regex_Replacement_Fix::apply 89 | * 90 | * @dataProvider provide_unit_spacing_data 91 | * 92 | * @param string $input HTML input. 93 | * @param string $result Expected result. 94 | */ 95 | public function test_apply( $input, $result ) { 96 | $this->s->set_unit_spacing( true ); 97 | 98 | $this->assertFixResultSame( $input, $result ); 99 | } 100 | 101 | /** 102 | * Test apply. 103 | * 104 | * @covers ::apply 105 | * @covers ::__construct 106 | * 107 | * @uses PHP_Typography\Fixes\Node_Fixes\Simple_Regex_Replacement_Fix::apply 108 | * 109 | * @dataProvider provide_unit_spacing_data 110 | * 111 | * @param string $input HTML input. 112 | * @param string $result Expected result. 113 | */ 114 | public function test_apply_off( $input, $result ) { 115 | $this->s->set_unit_spacing( false ); 116 | 117 | $this->assertFixResultSame( $input, $input ); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /tests/fixes/token-fixes/class-token-fix-testcase.php: -------------------------------------------------------------------------------- 1 | s = new Settings( false ); 65 | } 66 | 67 | /** 68 | * Assert that the output of the fix is the same as the expected result. 69 | * 70 | * @param string $input Text node value. 71 | * @param string $result Expected result. 72 | * @param bool $is_title Indicates if the processed tokens occur in a title/heading context. 73 | * @param \DOMText $textnode The context DOM node. 74 | */ 75 | protected function assertFixResultSame( $input, $result, $is_title, $textnode ) { 76 | $tokens = $this->tokenize_sentence( $input ); 77 | $result_tokens = $this->fix->apply( $tokens, $textnode, $this->s, $is_title ); 78 | $this->assert_tokens_same( $result, $result_tokens ); 79 | } 80 | 81 | /** 82 | * Creates a \DOMText node. 83 | * 84 | * @param string $parent_node The parent element. 85 | * @param string $content The node content. 86 | * 87 | * @return \DOMText 88 | */ 89 | protected function getTextnode( $parent_node, $content ) { 90 | $element = new \DOMElement( $parent_node, $content ); 91 | $this->nodes[] = $element; 92 | 93 | return $element->firstChild; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /tests/fixes/token-fixes/class-wrap-emails-fix-test.php: -------------------------------------------------------------------------------- 1 | fix = new Token_Fixes\Wrap_Emails_Fix(); 57 | } 58 | 59 | /** 60 | * Tests the constructor. 61 | * 62 | * @covers ::__construct 63 | */ 64 | public function test_constructor() { 65 | $fix = new Token_Fixes\Wrap_Emails_Fix( true ); 66 | 67 | $this->assert_attribute_same( Token_Fix::OTHER, 'target', $fix, 'The fixer should be targetting OTHER tokens.' ); 68 | $this->assert_attribute_same( true, 'feed_compatible', $fix, 'The fixer should not be feed_compatible.' ); 69 | } 70 | 71 | 72 | /** 73 | * Provide data for testing wrap_emails. 74 | * 75 | * @return array 76 | */ 77 | public function provide_wrap_emails_data() { 78 | return [ 79 | [ 'code@example.org', 'code@​example.​org' ], 80 | [ 'some.name@sub.domain.org', 'some.​name@​sub.​domain.​org' ], 81 | [ 'funny123@summer1.org', 'funny123@​summer1.​org' ], 82 | ]; 83 | } 84 | 85 | /** 86 | * Test apply. 87 | * 88 | * @covers ::apply 89 | * 90 | * @uses PHP_Typography\Text_Parser 91 | * @uses PHP_Typography\Text_Parser\Token 92 | * 93 | * @dataProvider provide_wrap_emails_data 94 | * 95 | * @param string $input HTML input. 96 | * @param string $result Expected result. 97 | */ 98 | public function test_apply( $input, $result ) { 99 | $this->s->set_wrap_emails( true ); 100 | 101 | $this->assertFixResultSame( $input, $result, false, $this->getTextnode( 'foo', $input ) ); 102 | } 103 | 104 | /** 105 | * Test apply. 106 | * 107 | * @covers ::apply 108 | * 109 | * @uses PHP_Typography\Text_Parser 110 | * @uses PHP_Typography\Text_Parser\Token 111 | * 112 | * @dataProvider provide_wrap_emails_data 113 | * 114 | * @param string $input HTML input. 115 | * @param string $result Expected result. 116 | */ 117 | public function test_apply_off( $input, $result ) { 118 | $this->s->set_wrap_emails( false ); 119 | 120 | $this->assertFixResultSame( $input, $input, false, $this->getTextnode( 'foo', $input ) ); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /tests/hyphenator/class-cache-test.php: -------------------------------------------------------------------------------- 1 | c = new Cache(); 55 | } 56 | 57 | /** 58 | * Tests serialization & the has_changed property. 59 | * 60 | * @covers ::__sleep 61 | * @covers ::has_changed 62 | * 63 | * @uses ::set_hyphenator 64 | * @uses ::get_hyphenator 65 | */ 66 | public function test_has_changed() { 67 | $this->assertFalse( $this->c->has_changed() ); 68 | $this->c->set_hyphenator( 'de', $this->createMock( \PHP_Typography\Hyphenator::class ) ); 69 | $this->assertTrue( $this->c->has_changed() ); 70 | 71 | $new_c = unserialize( serialize( $this->c ) ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.serialize_serialize,WordPress.PHP.DiscouragedPHPFunctions.serialize_unserialize 72 | $this->assertInstanceOf( Cache::class, $new_c ); 73 | $this->assertInstanceOf( \PHP_Typography\Hyphenator::class, $new_c->get_hyphenator( 'de' ) ); 74 | $this->assertFalse( $new_c->has_changed() ); 75 | } 76 | 77 | /** 78 | * Tests set_hyphenator. 79 | * 80 | * @covers ::set_hyphenator 81 | * @covers ::get_hyphenator 82 | * 83 | * @uses PHP_Typography\Hyphenator\Trie_Node 84 | */ 85 | public function test_hyphenator_cache() { 86 | $hyphenator = new \PHP_Typography\Hyphenator( 'en-US', [] ); 87 | 88 | $this->assertSame( null, $this->c->get_hyphenator( 'de' ) ); 89 | $this->c->set_hyphenator( 'de', $hyphenator ); 90 | $this->assertSame( $hyphenator, $this->c->get_hyphenator( 'de' ) ); 91 | $this->assertSame( null, $this->c->get_hyphenator( 'foobar' ) ); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /tests/hyphenator/class-trie-node-test.php: -------------------------------------------------------------------------------- 1 | '00010', 52 | '_abl' => '00030', 53 | '_abo' => '00002', 54 | '_abol' => '000300', 55 | '_abor' => '000100', 56 | '_abs' => '00032', 57 | '_abu' => '00030', 58 | '_aden' => '000030', 59 | ] 60 | ); 61 | 62 | $this->assertInstanceOf( Trie_Node::class, $trie ); 63 | 64 | return $trie; 65 | } 66 | 67 | /** 68 | * Test exists. 69 | * 70 | * @covers ::exists 71 | * @depends test_build_trie 72 | 73 | * @param Trie_Node $trie A trie. 74 | * 75 | * @return Trie_Node 76 | */ 77 | public function test_exists( Trie_Node $trie ) { 78 | $this->assertTrue( $trie->exists( '_' ) ); 79 | $this->assertFalse( $trie->exists( 'foobar' ) ); 80 | 81 | return $trie; 82 | } 83 | 84 | /** 85 | * Test get_node. 86 | * 87 | * @covers ::get_node 88 | * @depends test_build_trie 89 | * 90 | * @param Trie_Node $trie A trie. 91 | * 92 | * @return Trie_Node 93 | */ 94 | public function test_get_node( Trie_Node $trie ) { 95 | $node = $trie->get_node( '_' ); 96 | 97 | $this->assertInstanceOf( Trie_Node::class, $node ); 98 | 99 | return $trie; 100 | } 101 | 102 | /** 103 | * Test get_node. 104 | * 105 | * @covers ::get_node 106 | * @depends test_get_node 107 | * 108 | * @uses ::__construct 109 | * 110 | * @param Trie_Node $trie A trie. 111 | * 112 | * @return Trie_Node 113 | */ 114 | public function test_get_node_new( Trie_Node $trie ) { 115 | $node = $trie->get_node( '*' ); 116 | 117 | $this->assertInstanceOf( Trie_Node::class, $node ); 118 | 119 | return $trie; 120 | } 121 | 122 | /** 123 | * Test offsets. 124 | * 125 | * @covers ::offsets 126 | * @depends test_build_trie 127 | * 128 | * @uses ::get_node 129 | * 130 | * @param Trie_Node $trie A trie. 131 | * 132 | * @return Trie_Node 133 | */ 134 | public function test_offsets( Trie_Node $trie ) { 135 | $node = $trie->get_node( '_' ); 136 | $node = $node->get_node( 'a' ); 137 | $node = $node->get_node( 'b' ); 138 | $node = $node->get_node( 'a' ); 139 | 140 | $this->assertInstanceOf( Trie_Node::class, $node ); 141 | $this->assert_is_array( $node->offsets() ); 142 | $this->assertGreaterThan( 0, count( $node->offsets() ) ); 143 | 144 | return $trie; 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /tests/settings/class-dash-style-test.php: -------------------------------------------------------------------------------- 1 | expect_exception_message_matches( "/^Invalid dash style $style.\$/" ); 74 | } 75 | 76 | $dashes = Dash_Style::get_styled_dashes( $style ); 77 | 78 | if ( is_array( $result ) ) { 79 | $this->assertInstanceOf( Dashes::class, $dashes ); 80 | $this->assertSame( $result[0], $dashes->parenthetical_dash() ); 81 | $this->assertSame( $result[1], $dashes->parenthetical_space() ); 82 | $this->assertSame( $result[2], $dashes->interval_dash() ); 83 | $this->assertSame( $result[3], $dashes->interval_space() ); 84 | } elseif ( $style instanceof Dashes ) { 85 | $this->assertSame( $style, $dashes ); 86 | } else { 87 | $this->assertNull( $dashes, 'get_styled_dashes should return null for invalid indices.' ); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /tests/settings/class-dashes-test.php: -------------------------------------------------------------------------------- 1 | makePartial(); 50 | 51 | $dashes->shouldReceive( 'interval_dash' )->once()->andReturn( 'iv' ); 52 | $dashes->shouldReceive( 'interval_space' )->once()->andReturn( 'is' ); 53 | $dashes->shouldReceive( 'parenthetical_dash' )->once()->andReturn( 'pd' ); 54 | $dashes->shouldReceive( 'parenthetical_space' )->once()->andReturn( 'ps' ); 55 | 56 | $this->assertIsString( \json_encode( $dashes ) ); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /tests/settings/class-quotes-test.php: -------------------------------------------------------------------------------- 1 | makePartial(); 50 | 51 | $dashes->shouldReceive( 'open' )->once()->andReturn( 'o' ); 52 | $dashes->shouldReceive( 'close' )->once()->andReturn( 'c' ); 53 | 54 | $this->assertIsString( \json_encode( $dashes ) ); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /tests/settings/class-simple-dashes-test.php: -------------------------------------------------------------------------------- 1 | assertSame( $pdash, $dashes->parenthetical_dash() ); 73 | $this->assertSame( $pspace, $dashes->parenthetical_space() ); 74 | $this->assertSame( $idash, $dashes->interval_dash() ); 75 | $this->assertSame( $ispace, $dashes->interval_space() ); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /tests/settings/class-simple-quotes-test.php: -------------------------------------------------------------------------------- 1 | assertSame( $open, $quotes->open() ); 69 | $this->assertSame( $close, $quotes->close() ); 70 | } 71 | } 72 | --------------------------------------------------------------------------------