├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── composer.json ├── phpunit.xml.dist ├── src ├── Exception │ ├── ExtensionMissingException.php │ ├── IllegalCharacterException.php │ ├── UndetectableEncodingException.php │ └── UnsupportedEncodingException.php ├── IconvTranscoder.php ├── MbTranscoder.php ├── Transcoder.php └── TranscoderInterface.php └── tests ├── IconvTranscoderTest.php ├── MbTranscoderTest.php └── TranscoderTest.php /.gitignore: -------------------------------------------------------------------------------- 1 | composer.lock 2 | vendor 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | 3 | php: 4 | - 5.4 5 | - 5.5 6 | - 5.6 7 | - hhvm-nightly 8 | 9 | before_script: 10 | - composer install --dev 11 | 12 | script: phpunit --coverage-clover=coverage.clover 13 | 14 | after_script: 15 | - wget https://scrutinizer-ci.com/ocular.phar 16 | - php ocular.phar code-coverage:upload --format=php-clover coverage.clover 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 David de Boer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Transcoder 2 | ========== 3 | 4 | [![Build Status](https://travis-ci.org/ddeboer/transcoder.svg?branch=master)](https://travis-ci.org/ddeboer/transcoder) 5 | [![Release](https://img.shields.io/github/release/ddeboer/transcoder.svg?style=flat)](https://packagist.org/packages/ddeboer/transcoder) 6 | 7 | Introduction 8 | ------------ 9 | 10 | This is a wrapper around PHP’s `mb_convert_encoding` and `iconv` functions. 11 | This library adds: 12 | 13 | * fallback from `mb` to `iconv` for unknown encodings 14 | * conversion of warnings to proper exceptions. 15 | 16 | Installation 17 | ------------ 18 | 19 | The recommended way to install the Transcoder library is through 20 | [Composer](http://getcomposer.org): 21 | 22 | ```bash 23 | $ composer require ddeboer/transcoder 24 | ``` 25 | 26 | This command requires you to have Composer installed globally, as explained 27 | in the [installation chapter](https://getcomposer.org/doc/00-intro.md) 28 | of the Composer documentation. 29 | 30 | Usage 31 | ----- 32 | 33 | ### Basics 34 | 35 | Create the right transcoder for your platform and translate some strings: 36 | 37 | ```php 38 | use Ddeboer\Transcoder\Transcoder; 39 | 40 | $transcoder = Transcoder::create(); 41 | $result = $transcoder->transcode('España'); 42 | ``` 43 | 44 | You can also manually instantiate a transcoder of your liking: 45 | 46 | ```php 47 | use Ddeboer\Transcoder\MbTranscoder; 48 | 49 | $transcoder = new MbTranscoder(); 50 | 51 | ``` 52 | 53 | Or: 54 | 55 | ```php 56 | use Ddeboer\Transcoder\IconvTranscoder; 57 | 58 | $transcoder = new IconvTranscoder(); 59 | ``` 60 | 61 | ### Source encoding 62 | 63 | By default, the source encoding is detected automatically. However, you get 64 | much more reliable results when you specify it explicitly: 65 | 66 | ```php 67 | $transcoder->transcode('España', 'iso-8859-1'); 68 | ``` 69 | 70 | ### Target encoding 71 | 72 | Specify a default target encoding as the first argument to `create()`: 73 | 74 | 75 | ```php 76 | use Ddeboer\Transcoder\Transcoder; 77 | 78 | $isoTranscoder = Transcoder::create('iso-8859-1'); 79 | ``` 80 | 81 | Alternatively, specify a target encoding as the third argument in a 82 | `transcode()` call: 83 | 84 | ```php 85 | use Ddeboer\Transcoder\Transcoder; 86 | 87 | $transcoder->transcode('España', null, 'UTF-8'); 88 | ``` 89 | 90 | ### Error handling 91 | 92 | PHP’s `mv_convert_encoding` and `iconv` are inconvenient to use because they 93 | generate notices and warnings instead of proper exceptions. This library fixes 94 | that: 95 | 96 | 97 | ```php 98 | use Ddeboer\Transcoder\Exception\UndetectableEncodingException; 99 | use Ddeboer\Transcoder\Exception\UnsupportedEncodingException; 100 | use Ddeboer\Transcoder\Exception\IllegalCharacterException; 101 | 102 | $input = 'España'; 103 | 104 | try { 105 | $transcoder->transcode($input); 106 | } catch (UndetectableEncodingException $e) { 107 | // Failed to automatically detect $input’s encoding 108 | } 109 | 110 | try { 111 | $transcoder->transcode($input, null, 'not-a-real-encoding'); 112 | } catch (UnsupportedEncodingException $e) { 113 | // ‘not-a-real-encoding’ is an unsupported encoding 114 | } 115 | 116 | try { 117 | $transcoder->transcode('Illegal quotes: ‘ ’', null, 'iso-8859-1'); 118 | } catch (IllegalCharacterException $e) { 119 | // Curly quotes ‘ ’ are illegal in ISO-8859-1 120 | } 121 | ``` 122 | 123 | ### Transcoder fallback 124 | 125 | In general, `mb_convert_encoding` is faster than `iconv`. However, as `iconv` 126 | supports more encodings than `mb_convert_encoding`, it makes sense to combine 127 | the two. 128 | 129 | So, the Transcoder returned from `create()`: 130 | 131 | * uses `mb_convert_encoding` if the 132 | [mbstring](http://php.net/manual/en/book.mbstring.php) PHP extension is 133 | installed; 134 | * if not, it uses `iconv` instead if the 135 | [iconv](http://php.net/manual/en/book.iconv.php) extension is installed; 136 | * if both the mbstring and iconv extension are available, the Transcoder will 137 | first try `mb_convert_encoding` and fall back to `iconv`. 138 | 139 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ddeboer/transcoder", 3 | "description": "Better encoding conversion for PHP", 4 | "keywords": [ "encoding", "utf-8", "iso", "mb", "iconv", "multibyte", "charset", "mb_convert_encoding" ], 5 | "license": "MIT", 6 | "authors": [ 7 | { 8 | "name": "David de Boer", 9 | "email": "david@ddeboer.nl" 10 | }, 11 | { 12 | "name": "Community contributors", 13 | "homepage": "https://github.com/ddeboer/transcoder/graphs/contributors" 14 | } 15 | ], 16 | "require": { 17 | "php": ">=5.4.0" 18 | }, 19 | "require-dev": { 20 | "phpunit/phpunit": "~4.0" 21 | }, 22 | "suggest": { 23 | "ext-mbstring": "For using the MbTranscoder", 24 | "ext-iconv": "For using the IconvTranscoder" 25 | }, 26 | "autoload": { 27 | "psr-4": { 28 | "Ddeboer\\Transcoder\\": "src/", 29 | "Ddeboer\\Transcoder\\Tests\\": "tests/" 30 | } 31 | }, 32 | "extra": { 33 | "branch-alias": { 34 | "dev-master": "1.0.x-dev" 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | ./tests/ 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /src/Exception/ExtensionMissingException.php: -------------------------------------------------------------------------------- 1 | defaultEncoding = $defaultEncoding; 20 | } 21 | 22 | /** 23 | * {@inheritdoc} 24 | */ 25 | public function transcode($string, $from = null, $to = null) 26 | { 27 | set_error_handler( 28 | function ($no, $message) use ($string) { 29 | if (1 === preg_match('/Wrong charset, conversion (.+) is/', $message, $matches)) { 30 | throw new UnsupportedEncodingException($matches[1], $message); 31 | } else { 32 | throw new IllegalCharacterException($string, $message); 33 | } 34 | }, 35 | E_NOTICE | E_USER_NOTICE 36 | ); 37 | 38 | $result = iconv($from, $to ?: $this->defaultEncoding, $string); 39 | restore_error_handler(); 40 | 41 | return $result; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/MbTranscoder.php: -------------------------------------------------------------------------------- 1 | assertSupported($defaultEncoding); 28 | $this->defaultEncoding = $defaultEncoding; 29 | } 30 | 31 | /** 32 | * {@inheritdoc} 33 | */ 34 | public function transcode($string, $from = null, $to = null) 35 | { 36 | if ($from) { 37 | if (is_array($from)) { 38 | array_map(array($this, 'assertSupported'), $from); 39 | } else { 40 | $this->assertSupported($from); 41 | } 42 | } 43 | 44 | if (!$from || 'auto' === $from) { 45 | set_error_handler( 46 | function ($no, $warning) use ($string) { 47 | throw new UndetectableEncodingException($string, $warning); 48 | }, 49 | E_WARNING 50 | ); 51 | } 52 | 53 | 54 | if ($to) { 55 | $this->assertSupported($to); 56 | } 57 | 58 | $result = mb_convert_encoding( 59 | $string, 60 | $to ?: $this->defaultEncoding, 61 | $from ?: 'auto' 62 | ); 63 | 64 | restore_error_handler(); 65 | 66 | return $result; 67 | } 68 | 69 | private function assertSupported($encoding) 70 | { 71 | if (!$this->isSupported($encoding)) { 72 | throw new UnsupportedEncodingException($encoding); 73 | } 74 | } 75 | 76 | private function isSupported($encoding) 77 | { 78 | return isset(self::$encodings[strtolower($encoding)]); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/Transcoder.php: -------------------------------------------------------------------------------- 1 | transcoders = $transcoders; 20 | } 21 | 22 | /** 23 | * {@inheritdoc} 24 | */ 25 | public function transcode($string, $from = null, $to = null) 26 | { 27 | foreach ($this->transcoders as $transcoder) { 28 | try { 29 | return $transcoder->transcode($string, $from, $to); 30 | } catch (UnsupportedEncodingException $e) { 31 | // Ignore as long as the fallback transcoder is all right 32 | } 33 | } 34 | 35 | throw $e; 36 | } 37 | 38 | /** 39 | * Create a transcoder 40 | * 41 | * @param string $defaultEncoding 42 | * 43 | * @return TranscoderInterface 44 | * 45 | * @throws ExtensionMissingException 46 | */ 47 | public static function create($defaultEncoding = 'UTF-8') 48 | { 49 | if (isset(self::$chain[$defaultEncoding])) { 50 | return self::$chain[$defaultEncoding]; 51 | } 52 | 53 | $transcoders = []; 54 | 55 | try { 56 | $transcoders[] = new MbTranscoder($defaultEncoding); 57 | } catch (ExtensionMissingException $mb) { 58 | // Ignore missing mbstring extension; fall back to iconv 59 | } 60 | 61 | try { 62 | $transcoders[] = new IconvTranscoder($defaultEncoding); 63 | } catch (ExtensionMissingException $iconv) { 64 | // Neither mbstring nor iconv 65 | throw $iconv; 66 | } 67 | 68 | self::$chain[$defaultEncoding] = new self($transcoders); 69 | 70 | return self::$chain[$defaultEncoding]; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/TranscoderInterface.php: -------------------------------------------------------------------------------- 1 | transcoder = new IconvTranscoder(); 17 | } 18 | 19 | /** 20 | * @expectedException \Ddeboer\Transcoder\Exception\UnsupportedEncodingException 21 | * @expectedExceptionMessage bad-encoding 22 | */ 23 | public function testTranscodeUnsupportedFromEncoding() 24 | { 25 | $this->transcoder->transcode('bla', 'bad-encoding'); 26 | } 27 | 28 | public function testDetectEncoding() 29 | { 30 | $this->transcoder->transcode('España', null, 'iso-8859-1'); 31 | } 32 | 33 | /** 34 | * @expectedException \Ddeboer\Transcoder\Exception\IllegalCharacterException 35 | */ 36 | public function testTranscodeIllegalCharacter() 37 | { 38 | $this->transcoder->transcode('“', null, 'iso-8859-1'); 39 | } 40 | 41 | /** 42 | * @dataProvider getStrings 43 | */ 44 | public function testTranscode($string, $encoding) 45 | { 46 | $result = $this->transcoder->transcode($string, null, $encoding); 47 | $this->assertEquals($string, $this->transcoder->transcode($result, $encoding)); 48 | } 49 | 50 | public function getStrings() 51 | { 52 | return [ 53 | ['España', 'iso-8859-1'] 54 | ]; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /tests/MbTranscoderTest.php: -------------------------------------------------------------------------------- 1 | transcoder = new MbTranscoder(); 17 | } 18 | 19 | /** 20 | * @expectedException \Ddeboer\Transcoder\Exception\UnsupportedEncodingException 21 | * @expectedExceptionMessage bad-encoding 22 | */ 23 | public function testTranscodeUnsupportedFromEncoding() 24 | { 25 | $this->transcoder->transcode('bla', 'bad-encoding'); 26 | } 27 | 28 | /** 29 | * @expectedException \Ddeboer\Transcoder\Exception\UnsupportedEncodingException 30 | * @expectedExceptionMessage bad-encoding 31 | */ 32 | public function testTranscodeUnsupportedToEncoding() 33 | { 34 | $this->transcoder->transcode('bla', null, 'bad-encoding'); 35 | } 36 | 37 | public function testDetectEncoding() 38 | { 39 | $result = $this->transcoder->transcode('España', null, 'iso-8859-1'); 40 | $this->transcoder->transcode($result); 41 | } 42 | 43 | /** 44 | * @expectedException \Ddeboer\Transcoder\Exception\UndetectableEncodingException 45 | * @expectedExceptionMessage is undetectable 46 | */ 47 | public function testUndetectableEncoding() 48 | { 49 | $result = $this->transcoder->transcode( 50 | '‘curly quotes make this incompatible with 1252’', 51 | null, 52 | 'windows-1252' 53 | ); 54 | $this->transcoder->transcode($result); 55 | } 56 | 57 | /** 58 | * @dataProvider getStrings 59 | */ 60 | public function testTranscode($string, $encoding) 61 | { 62 | $result = $this->transcoder->transcode($string, null, $encoding); 63 | $this->assertEquals($string, $this->transcoder->transcode($result, $encoding)); 64 | } 65 | 66 | public function getStrings() 67 | { 68 | return [ 69 | ['‘España’', 'windows-1252'], 70 | ['España', 'iso-8859-1'] 71 | ]; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /tests/TranscoderTest.php: -------------------------------------------------------------------------------- 1 | transcoder = Transcoder::create(); 17 | } 18 | 19 | /** 20 | * @dataProvider getStrings 21 | */ 22 | public function testTranscode($string, $encoding) 23 | { 24 | $result = $this->transcoder->transcode($string, 'UTF-8', $encoding); 25 | $this->assertEquals($string, $this->transcoder->transcode($result, $encoding)); 26 | } 27 | 28 | public function getStrings() 29 | { 30 | return [ 31 | ['España', 'UTF-8'], 32 | ['bla', 'windows-1257'] // Encoding only supported by iconv 33 | ]; 34 | } 35 | } 36 | --------------------------------------------------------------------------------