├── src └── StringEncoder │ ├── Exceptions │ ├── ContentsFailedException.php │ ├── ConvertNoValueException.php │ └── InvalidEncodingException.php │ ├── Contracts │ ├── DTO │ │ ├── EncodingDTOInterface.php │ │ └── MBStringDTOInterface.php │ ├── Discovery │ │ └── ValidatorDiscoveryInterface.php │ ├── EncoderInterface.php │ ├── ConvertReadInterface.php │ ├── OptionsInterface.php │ ├── ConvertWriteInterface.php │ ├── MB │ │ └── RegexInterface.php │ └── ProxyEncoderInterface.php │ ├── Discovery │ └── ValidatorDiscovery.php │ ├── MB │ ├── UTF8 │ │ └── Bom.php │ ├── Regex.php │ ├── Validator.php │ └── Convert.php │ ├── Options.php │ ├── DTO │ ├── EncodingDTO.php │ └── MBStringDTO.php │ ├── Encoder.php │ └── Proxy │ └── Encoder.php ├── SECURITY.md ├── .gitattributes ├── .github └── FUNDING.yml └── composer.json /src/StringEncoder/Exceptions/ContentsFailedException.php: -------------------------------------------------------------------------------- 1 | getString(); 21 | // remove utf-8 BOM 22 | if (\substr($value, 0, 3) == "\xef\xbb\xbf") { 23 | $value = \substr($value, 3); 24 | } 25 | if (\substr($value, -3, 3) == "\xef\xbb\xbf") { 26 | $value = \substr($value, 0, -3); 27 | } 28 | 29 | return MBStringDTO::makeFromDTO($value, $MBStringDTO); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "paquettg/string-encode", 3 | "type": "library", 4 | "description": "Facilitating the process of altering string encoding in PHP.", 5 | "keywords": ["encoding", "charset", "string"], 6 | "homepage": "https://github.com/paquettg/string-encoder", 7 | "license": "MIT", 8 | "authors": [ 9 | { 10 | "name": "Gilles Paquette", 11 | "email": "paquettg@gmail.com", 12 | "homepage": "http://gillespaquette.ca" 13 | } 14 | ], 15 | "require": { 16 | "php": ">=7.2", 17 | "ext-mbstring": "*" 18 | }, 19 | "require-dev": { 20 | "phpunit/phpunit": "^8.4", 21 | "mockery/mockery": "^1.3", 22 | "phan/phan": "^3.2", 23 | "friendsofphp/php-cs-fixer": "^2.16", 24 | "php-coveralls/php-coveralls": "^2.4.2" 25 | }, 26 | "autoload": { 27 | "psr-4": { 28 | "StringEncoder\\": "src/StringEncoder" 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/StringEncoder/Contracts/ProxyEncoderInterface.php: -------------------------------------------------------------------------------- 1 | defaultTargetEncoding = EncodingDTO::makeFromString($defaultTargetEncoding, null, $this); 34 | 35 | return $this; 36 | } 37 | 38 | /** 39 | * @throws Exceptions\InvalidEncodingException 40 | */ 41 | public function getDefaultTargetEncoding(): EncodingDTOInterface 42 | { 43 | if ($this->defaultTargetEncoding === null) { 44 | $this->defaultTargetEncoding = EncodingDTO::makeFromString('UTF-8'); 45 | } 46 | 47 | return $this->defaultTargetEncoding; 48 | } 49 | 50 | public function setRemoveUTF8BOM(bool $remove): OptionsInterface 51 | { 52 | $this->removeUTF8BOM = $remove; 53 | 54 | return $this; 55 | } 56 | 57 | public function isRemoveUTF8BOM(): bool 58 | { 59 | return $this->removeUTF8BOM; 60 | } 61 | 62 | public function setCaseSensitiveEncoding(bool $caseSensitive): OptionsInterface 63 | { 64 | $this->caseSensitiveEncoding = $caseSensitive; 65 | 66 | return $this; 67 | } 68 | 69 | public function isCaseSensitiveEncoding(): bool 70 | { 71 | return $this->caseSensitiveEncoding; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/StringEncoder/DTO/EncodingDTO.php: -------------------------------------------------------------------------------- 1 | validateEncoding($encoding, $options->isCaseSensitiveEncoding())) { 36 | throw new InvalidEncodingException('Encoding "' . $encoding . '" is not supported by this system.'); 37 | } 38 | if (!$options->isCaseSensitiveEncoding()) { 39 | // we need to potential fix the encoding string provided with the correct case. 40 | $encoding = $validator->determineEncoding($encoding, $options->isCaseSensitiveEncoding()); 41 | } 42 | 43 | $this->encoding = $encoding; 44 | } 45 | 46 | /** 47 | * @throws InvalidEncodingException 48 | * 49 | * @internal 50 | */ 51 | public static function makeFromString(string $encoding, ?Validator $validator = null, OptionsInterface $options = null): EncodingDTO 52 | { 53 | return new EncodingDTO($encoding, $validator, $options); 54 | } 55 | 56 | public function getEncoding(): string 57 | { 58 | return $this->encoding; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/StringEncoder/MB/Regex.php: -------------------------------------------------------------------------------- 1 | getEncoding()); 22 | } 23 | 24 | public function getEncoding(): string 25 | { 26 | return \mb_regex_encoding(); 27 | } 28 | 29 | /** 30 | * @throws InvalidEncodingException 31 | */ 32 | public function replace( 33 | string $pattern, 34 | string $replace, 35 | MBStringDTOInterface $MBStringDTO, 36 | bool $ignoreCase = false 37 | ): MBStringDTOInterface { 38 | if ($ignoreCase) { 39 | $value = \mb_eregi_replace($pattern, $replace, $MBStringDTO->getString()); 40 | } else { 41 | $value = \mb_ereg_replace($pattern, $replace, $MBStringDTO->getString()); 42 | } 43 | 44 | return MBStringDTO::makeFromDTO($value, $MBStringDTO); 45 | } 46 | 47 | /** 48 | * @param string[] $patterns 49 | * @param string[] $replaces 50 | * 51 | * @throws InvalidEncodingException 52 | */ 53 | public function replaceMultiple( 54 | array $patterns, 55 | array $replaces, 56 | MBStringDTOInterface $MBStringDTO, 57 | bool $ignoreCase = false 58 | ): MBStringDTOInterface { 59 | foreach ($patterns as $key => $pattern) { 60 | $replace = $replaces[$key]; 61 | $MBStringDTO = $this->replace($pattern, $replace, $MBStringDTO, $ignoreCase); 62 | } 63 | 64 | return $MBStringDTO; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/StringEncoder/Encoder.php: -------------------------------------------------------------------------------- 1 | options = $options; 34 | 35 | return $this; 36 | } 37 | 38 | public function getTargetEncoding(): ?string 39 | { 40 | if ($this->targetEncoding === null) { 41 | return null; 42 | } 43 | 44 | return $this->targetEncoding->getEncoding(); 45 | } 46 | 47 | /** 48 | * @throws InvalidEncodingException 49 | */ 50 | public function setTargetEncoding(string $encoding): void 51 | { 52 | $this->targetEncoding = EncodingDTO::makeFromString($encoding, null, $this->options); 53 | } 54 | 55 | public function getSourceEncoding(): ?string 56 | { 57 | if ($this->sourceEncoding === null) { 58 | return null; 59 | } 60 | 61 | return $this->sourceEncoding->getEncoding(); 62 | } 63 | 64 | /** 65 | * @throws InvalidEncodingException 66 | */ 67 | public function setSourceEncoding(string $encoding): void 68 | { 69 | $this->sourceEncoding = EncodingDTO::makeFromString($encoding, null, $this->options); 70 | } 71 | 72 | public function convert(): ConvertReadInterface 73 | { 74 | return new Convert($this->sourceEncoding, $this->targetEncoding, $this->options); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/StringEncoder/MB/Validator.php: -------------------------------------------------------------------------------- 1 | determineEncoding($encoding, $caseSensitive) !== null; 17 | } 18 | 19 | /** 20 | * @internal 21 | */ 22 | public function determineEncoding(string $encoding, bool $caseSensitive): ?string 23 | { 24 | $encodingList = \mb_list_encodings(); 25 | 26 | foreach ($encodingList as $validEncoding) { 27 | if ($validEncoding === $encoding || ( 28 | $caseSensitive === false && 29 | \mb_convert_case($validEncoding, MB_CASE_LOWER) === \mb_convert_case($encoding, MB_CASE_LOWER)) 30 | ) { 31 | return $validEncoding; 32 | } 33 | if ($this->validateEncodingAlias($encoding, $validEncoding, $caseSensitive)) { 34 | return $validEncoding; 35 | } 36 | } 37 | 38 | // no valid encoding string found that matches 39 | return null; 40 | } 41 | 42 | public function validateString(string $string, EncodingDTOInterface $encodingDTO): bool 43 | { 44 | $encoding = \mb_detect_encoding($string, [$encodingDTO->getEncoding()]); 45 | 46 | return $encoding === $encodingDTO->getEncoding(); 47 | } 48 | 49 | /** 50 | * @internal 51 | */ 52 | private function validateEncodingAlias(string $encoding, string $validEncoding, bool $caseSensitive): bool 53 | { 54 | if ($caseSensitive) { 55 | $aliasEncoding = \mb_encoding_aliases($validEncoding); 56 | } else { 57 | $encoding = \mb_convert_case($encoding, MB_CASE_LOWER); 58 | $aliasEncoding = $this->lowerCaseArray(\mb_encoding_aliases($validEncoding)); 59 | } 60 | 61 | return \in_array($encoding, $aliasEncoding); 62 | } 63 | 64 | /** 65 | * @internal 66 | * 67 | * @param string[] $encodings 68 | * 69 | * @return string[] 70 | */ 71 | private function lowerCaseArray(array $encodings): array 72 | { 73 | $newEncodings = []; 74 | foreach ($encodings as $key => $encoding) { 75 | $newEncodings[$key] = \mb_convert_case($encoding, MB_CASE_LOWER); 76 | } 77 | 78 | return $newEncodings; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/StringEncoder/Proxy/Encoder.php: -------------------------------------------------------------------------------- 1 | setTargetEncoding($targetEncoding); 28 | if ($sourceEncoding !== null) { 29 | $encoder->setSourceEncoding($sourceEncoding); 30 | } 31 | self::mount('Encoder', $encoder); 32 | } 33 | 34 | /** 35 | * Call this to mount the static facade. The facade allows you to use 36 | * this object as a $className. 37 | */ 38 | public static function mount(string $className = 'Encoder', ?EncoderImplementation $encoder = null): bool 39 | { 40 | if (!\class_exists($className)) { 41 | \class_alias(__CLASS__, $className); 42 | } 43 | if ($encoder instanceof EncoderImplementation) { 44 | self::$encoder = $encoder; 45 | } 46 | 47 | return true; 48 | } 49 | 50 | public static function getMountedEncoder(): ?EncoderImplementation 51 | { 52 | return self::$encoder; 53 | } 54 | 55 | public static function unload(): void 56 | { 57 | self::$encoder = null; 58 | } 59 | 60 | public static function setOptions(OptionsInterface $options): EncoderInterface 61 | { 62 | return self::$encoder->setOptions($options); 63 | } 64 | 65 | public static function getTargetEncoding(): ?string 66 | { 67 | return self::$encoder->getTargetEncoding(); 68 | } 69 | 70 | public static function setTargetEncoding(string $encoding): void 71 | { 72 | self::$encoder->setTargetEncoding($encoding); 73 | } 74 | 75 | public static function getSourceEncoding(): ?string 76 | { 77 | return self::$encoder->getSourceEncoding(); 78 | } 79 | 80 | public static function setSourceEncoding(string $encoding): void 81 | { 82 | self::$encoder->setSourceEncoding($encoding); 83 | } 84 | 85 | public static function convert(): ConvertReadInterface 86 | { 87 | return self::$encoder->convert(); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/StringEncoder/DTO/MBStringDTO.php: -------------------------------------------------------------------------------- 1 | getDefaultTargetEncoding(); 46 | } 47 | 48 | if (!$validator->validateString($string, $encodingDTO)) { 49 | throw new InvalidEncodingException('String "' . $string . '" is not the current encoding of "' . $encodingDTO->getEncoding() . '".'); 50 | } 51 | 52 | $this->options = $options; 53 | $this->encodingDTO = $encodingDTO; 54 | $this->string = $string; 55 | } 56 | 57 | /** 58 | * @throws InvalidEncodingException 59 | * 60 | * @internal 61 | */ 62 | public static function makeFromString( 63 | string $string, 64 | OptionsInterface $options, 65 | ?EncodingDTOInterface $encodingDTO = null, 66 | ?Validator $validator = null 67 | ): MBStringDTOInterface { 68 | return new MBStringDTO($string, $options, $encodingDTO, $validator); 69 | } 70 | 71 | /** 72 | * @throws InvalidEncodingException 73 | * 74 | * @internal 75 | */ 76 | public static function makeFromDTO(string $string, MBStringDTOInterface $MBStringDTO): MBStringDTOInterface 77 | { 78 | return new MBStringDTO($string, $MBStringDTO->getOptions(), $MBStringDTO->getEncodingDTO()); 79 | } 80 | 81 | public function getString(): string 82 | { 83 | return $this->string; 84 | } 85 | 86 | public function getEncodingDTO(): EncodingDTOInterface 87 | { 88 | return $this->encodingDTO; 89 | } 90 | 91 | public function getOptions(): OptionsInterface 92 | { 93 | return $this->options; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/StringEncoder/MB/Convert.php: -------------------------------------------------------------------------------- 1 | options = $options; 53 | 54 | if ($targetEncoding === null) { 55 | // apply default target encoding 56 | $targetEncoding = $this->options->getDefaultTargetEncoding(); 57 | } 58 | 59 | $this->sourceEncoding = $sourceEncoding; 60 | $this->targetEncoding = $targetEncoding; 61 | } 62 | 63 | /** 64 | * @throws InvalidEncodingException 65 | */ 66 | public function fromString(string $value): ConvertWriteInterface 67 | { 68 | $this->convert($value); 69 | 70 | return $this; 71 | } 72 | 73 | /** 74 | * @throws ConvertNoValueException 75 | */ 76 | public function toString(): string 77 | { 78 | if ($this->mbStringDTO === null) { 79 | throw new ConvertNoValueException('No value set for call to convert to string.'); 80 | } 81 | 82 | return $this->mbStringDTO->getString(); 83 | } 84 | 85 | /** 86 | * @throws ContentsFailedException 87 | * @throws InvalidEncodingException 88 | */ 89 | public function fromFile(string $filePath): ConvertWriteInterface 90 | { 91 | $content = @\file_get_contents($filePath); 92 | if ($content === false) { 93 | throw new ContentsFailedException('file_get_contents failed and returned false when trying to read "' . $filePath . '".'); 94 | } 95 | 96 | $this->convert($content); 97 | 98 | return $this; 99 | } 100 | 101 | /** 102 | * @throws ContentsFailedException 103 | * @throws ConvertNoValueException 104 | */ 105 | public function toFile(string $filePath): void 106 | { 107 | if ($this->mbStringDTO === null) { 108 | throw new ConvertNoValueException('No value set for call to convert to string.'); 109 | } 110 | 111 | $string = $this->mbStringDTO->getString(); 112 | $status = @\file_put_contents($filePath, $string); 113 | if ($status === false) { 114 | throw new ContentsFailedException('file_put_contents failed and returned false when trying to write "' . $filePath . '".'); 115 | } 116 | } 117 | 118 | /** 119 | * @throws ConvertNoValueException 120 | */ 121 | public function toDTO(): MBStringDTOInterface 122 | { 123 | if ($this->mbStringDTO === null) { 124 | throw new ConvertNoValueException('No value set for call to convert to string.'); 125 | } 126 | 127 | return $this->mbStringDTO; 128 | } 129 | 130 | /** 131 | * @throws InvalidEncodingException 132 | */ 133 | private function convert(string $value): void 134 | { 135 | if ($this->sourceEncoding === null) { 136 | $value = \mb_convert_encoding($value, $this->targetEncoding->getEncoding()); 137 | } elseif ($this->sourceEncoding->getEncoding() !== $this->targetEncoding->getEncoding()) { 138 | $value = \mb_convert_encoding($value, $this->targetEncoding->getEncoding(), $this->sourceEncoding->getEncoding()); 139 | } 140 | 141 | $this->mbStringDTO = MBStringDTO::makeFromString($value, $this->options, $this->targetEncoding); 142 | 143 | if ($this->options->isRemoveUTF8BOM() && 144 | $this->targetEncoding->getEncoding() === 'UTF-8') { 145 | $bom = new Bom(); 146 | $this->mbStringDTO = $bom->removeBOM($this->mbStringDTO); 147 | } 148 | } 149 | } 150 | --------------------------------------------------------------------------------