├── LICENSE ├── composer.json └── src ├── Exception.php ├── MissingIdnSupport.php ├── Parser.php ├── functions.php └── functions_include.php /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 ignace nyamagana butera 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "league/uri-parser", 3 | "type": "library", 4 | "description" : "userland URI parser RFC 3986 compliant", 5 | "keywords": [ 6 | "url", 7 | "uri", 8 | "rfc3986", 9 | "rfc3987", 10 | "parse_url", 11 | "parser" 12 | ], 13 | "license": "MIT", 14 | "homepage": "https://github.com/thephpleague/uri-parser", 15 | "authors": [ 16 | { 17 | "name" : "Ignace Nyamagana Butera", 18 | "email" : "nyamsprod@gmail.com", 19 | "homepage" : "https://nyamsprod.com" 20 | } 21 | ], 22 | "require": { 23 | "php" : ">=7.0.0" 24 | }, 25 | "require-dev": { 26 | "friendsofphp/php-cs-fixer": "^2.0", 27 | "phpunit/phpunit" : "^6.0", 28 | "phpstan/phpstan": "^0.9.2", 29 | "phpstan/phpstan-strict-rules": "^0.9.0", 30 | "phpstan/phpstan-phpunit": "^0.9.4" 31 | }, 32 | "autoload": { 33 | "psr-4": { 34 | "League\\Uri\\": "src" 35 | }, 36 | "files": ["src/functions_include.php"] 37 | }, 38 | "autoload-dev": { 39 | "psr-4": { 40 | "LeagueTest\\Uri\\Parser\\": "tests" 41 | } 42 | }, 43 | "suggest": { 44 | "ext-intl" : "Allow parsing RFC3987 compliant hosts", 45 | "league/uri-schemes": "Allow validating and normalizing URI parsing results" 46 | }, 47 | "scripts": { 48 | "phpcs": "php-cs-fixer fix -v --diff --dry-run --allow-risky=yes --ansi", 49 | "phpstan-src": "phpstan analyse -l max -c phpstan.src.neon src --ansi", 50 | "phpstan-tests": "phpstan analyse -l max -c phpstan.tests.neon tests --ansi", 51 | "phpstan": [ 52 | "@phpstan-src", 53 | "@phpstan-tests" 54 | ], 55 | "phpunit": "phpunit --coverage-text", 56 | "test": [ 57 | "@phpcs", 58 | "@phpstan", 59 | "@phpunit" 60 | ] 61 | }, 62 | "scripts-descriptions": { 63 | "phpcs": "Runs coding style test suite", 64 | "phpstan": "Runs complete codebase static analysis", 65 | "phpstan-src": "Runs source code static analysis", 66 | "phpstan-test": "Runs test suite static analysis", 67 | "phpunit": "Runs unit and functional testing", 68 | "test": "Runs full test suite" 69 | }, 70 | "extra": { 71 | "branch-alias": { 72 | "dev-master": "1.x-dev" 73 | } 74 | }, 75 | "config": { 76 | "sort-packages": true 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/Exception.php: -------------------------------------------------------------------------------- 1 | 7 | * @license https://github.com/thephpleague/uri-parser/blob/master/LICENSE (MIT License) 8 | * @version 1.4.1 9 | * @link https://uri.thephpleague.com/parser/ 10 | * 11 | * For the full copyright and license information, please view the LICENSE 12 | * file that was distributed with this source code. 13 | */ 14 | 15 | declare(strict_types=1); 16 | 17 | namespace League\Uri; 18 | 19 | use InvalidArgumentException; 20 | 21 | /** 22 | * An exception thrown on parse attempts of invalid URIs. 23 | * 24 | * @see https://tools.ietf.org/html/rfc3986 25 | * @package League\Uri 26 | * @author Ignace Nyamagana Butera 27 | * @since 0.2.0 28 | */ 29 | class Exception extends InvalidArgumentException 30 | { 31 | /** 32 | * Returns a new Instance from an error in URI characters. 33 | * 34 | * @return static 35 | */ 36 | public static function createFromInvalidCharacters(string $uri) 37 | { 38 | return new static(sprintf('The submitted uri `%s` contains invalid characters', $uri)); 39 | } 40 | 41 | /** 42 | * Returns a new Instance from an error in URI characters. 43 | * 44 | * @return static 45 | */ 46 | public static function createFromInvalidScheme(string $uri) 47 | { 48 | return new static(sprintf('The submitted uri `%s` contains an invalid scheme', $uri)); 49 | } 50 | 51 | /** 52 | * Returns a new Instance from an error in Host validation. 53 | * 54 | * @return static 55 | */ 56 | public static function createFromInvalidHost(string $host) 57 | { 58 | return new static(sprintf('The submitted host `%s` is invalid', $host)); 59 | } 60 | 61 | /** 62 | * Returns a new Instance from an error in port validation. 63 | * 64 | * @return static 65 | */ 66 | public static function createFromInvalidHostname(string $hostname) 67 | { 68 | return new static(sprintf('The submitted hostname `%s` is invalid', $hostname)); 69 | } 70 | 71 | /** 72 | * Returns a new Instance from an error in port validation. 73 | * 74 | * @param string|int $port 75 | * 76 | * @return static 77 | */ 78 | public static function createFromInvalidPort($port) 79 | { 80 | return new static(sprintf('The submitted port `%s` is invalid', $port)); 81 | } 82 | 83 | /** 84 | * Returns a new Instance from an error in Uri path component. 85 | * 86 | * @return static 87 | */ 88 | public static function createFromInvalidPath(string $uri) 89 | { 90 | return new static(sprintf('The submitted uri `%s` contains an invalid path', $uri)); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/MissingIdnSupport.php: -------------------------------------------------------------------------------- 1 | 7 | * @license https://github.com/thephpleague/uri-parser/blob/master/LICENSE (MIT License) 8 | * @version 1.4.1 9 | * @link https://uri.thephpleague.com/parser/ 10 | * 11 | * For the full copyright and license information, please view the LICENSE 12 | * file that was distributed with this source code. 13 | */ 14 | 15 | declare(strict_types=1); 16 | 17 | namespace League\Uri; 18 | 19 | /** 20 | * An exception thrown if the IDN support is missing or 21 | * the ICU is not at least version 4.6. 22 | * 23 | * @see https://tools.ietf.org/html/rfc3986 24 | * @package League\Uri 25 | * @author Ignace Nyamagana Butera 26 | * @since 1.4.0 27 | */ 28 | class MissingIdnSupport extends Exception 29 | { 30 | } 31 | -------------------------------------------------------------------------------- /src/Parser.php: -------------------------------------------------------------------------------- 1 | 7 | * @license https://github.com/thephpleague/uri-parser/blob/master/LICENSE (MIT License) 8 | * @version 1.4.1 9 | * @link https://uri.thephpleague.com/parser/ 10 | * 11 | * For the full copyright and license information, please view the LICENSE 12 | * file that was distributed with this source code. 13 | */ 14 | 15 | declare(strict_types=1); 16 | 17 | namespace League\Uri; 18 | 19 | use UnexpectedValueException; 20 | 21 | /** 22 | * A class to parse a URI string according to RFC3986. 23 | * 24 | * @see https://tools.ietf.org/html/rfc3986 25 | * @package League\Uri 26 | * @author Ignace Nyamagana Butera 27 | * @since 0.1.0 28 | */ 29 | class Parser 30 | { 31 | /** @deprecated 1.4.0 will be removed in the next major point release */ 32 | const INVALID_URI_CHARS = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F"; 33 | 34 | /** @deprecated 1.4.0 will be removed in the next major point release */ 35 | const SCHEME_VALID_STARTING_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; 36 | 37 | /** @deprecated 1.4.0 will be removed in the next major point release */ 38 | const SCHEME_VALID_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+.-'; 39 | 40 | /** @deprecated 1.4.0 will be removed in the next major point release */ 41 | const LABEL_VALID_STARTING_CHARS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; 42 | 43 | /** @deprecated 1.4.0 will be removed in the next major point release */ 44 | const LOCAL_LINK_PREFIX = '1111111010'; 45 | 46 | const URI_COMPONENTS = [ 47 | 'scheme' => null, 'user' => null, 'pass' => null, 'host' => null, 48 | 'port' => null, 'path' => '', 'query' => null, 'fragment' => null, 49 | ]; 50 | 51 | /** @deprecated 1.4.0 will be removed in the next major point release */ 52 | const SUB_DELIMITERS = '!$&\'()*+,;='; 53 | 54 | /** 55 | * Returns whether a scheme is valid. 56 | * 57 | * @see https://tools.ietf.org/html/rfc3986#section-3.1 58 | */ 59 | public function isScheme(string $scheme): bool 60 | { 61 | static $pattern = '/^[a-z][a-z0-9\+\.\-]*$/i'; 62 | 63 | return '' === $scheme || 1 === preg_match($pattern, $scheme); 64 | } 65 | 66 | /** 67 | * Returns whether a hostname is valid. 68 | * 69 | * @see https://tools.ietf.org/html/rfc3986#section-3.2.2 70 | */ 71 | public function isHost(string $host): bool 72 | { 73 | return '' === $host 74 | || $this->isIpHost($host) 75 | || $this->isRegisteredName($host); 76 | } 77 | 78 | /** 79 | * Validate a IPv6/IPvfuture host. 80 | * 81 | * @see http://tools.ietf.org/html/rfc3986#section-3.2.2 82 | * @see http://tools.ietf.org/html/rfc6874#section-2 83 | * @see http://tools.ietf.org/html/rfc6874#section-4 84 | */ 85 | private function isIpHost(string $host): bool 86 | { 87 | if ('[' !== ($host[0] ?? '') || ']' !== substr($host, -1)) { 88 | return false; 89 | } 90 | 91 | $ip = substr($host, 1, -1); 92 | if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) { 93 | return true; 94 | } 95 | 96 | static $ip_future = '/^ 97 | v(?[A-F0-9])+\. 98 | (?: 99 | (?[a-z0-9_~\-\.])| 100 | (?[!$&\'()*+,;=:]) # also include the : character 101 | )+ 102 | $/ix'; 103 | if (1 === preg_match($ip_future, $ip, $matches) && !in_array($matches['version'], ['4', '6'], true)) { 104 | return true; 105 | } 106 | 107 | if (false === ($pos = strpos($ip, '%'))) { 108 | return false; 109 | } 110 | 111 | static $gen_delims = '/[:\/?#\[\]@ ]/'; // Also includes space. 112 | if (1 === preg_match($gen_delims, rawurldecode(substr($ip, $pos)))) { 113 | return false; 114 | } 115 | 116 | $ip = substr($ip, 0, $pos); 117 | if (!filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) { 118 | return false; 119 | } 120 | 121 | //Only the address block fe80::/10 can have a Zone ID attach to 122 | //let's detect the link local significant 10 bits 123 | static $address_block = "\xfe\x80"; 124 | 125 | return 0 === strpos((string) inet_pton($ip), $address_block); 126 | } 127 | 128 | 129 | /** 130 | * Returns whether the host is an IPv4 or a registered named. 131 | * 132 | * @see http://tools.ietf.org/html/rfc3986#section-3.2.2 133 | * 134 | * @throws MissingIdnSupport if the registered name contains non-ASCII characters 135 | * and IDN support or ICU requirement are not available or met. 136 | * 137 | */ 138 | protected function isRegisteredName(string $host): bool 139 | { 140 | // Note that unreserved is purposely missing . as it is used to separate labels. 141 | static $reg_name = '/(?(DEFINE) 142 | (?[a-z0-9_~\-]) 143 | (?[!$&\'()*+,;=]) 144 | (?%[A-F0-9]{2}) 145 | (?(?:(?&unreserved)|(?&sub_delims)|(?&encoded))*) 146 | ) 147 | ^(?:(?®_name)\.)*(?®_name)\.?$/ix'; 148 | if (1 === preg_match($reg_name, $host)) { 149 | return true; 150 | } 151 | 152 | //to test IDN host non-ascii characters must be present in the host 153 | static $idn_pattern = '/[^\x20-\x7f]/'; 154 | if (1 !== preg_match($idn_pattern, $host)) { 155 | return false; 156 | } 157 | 158 | static $idn_support = null; 159 | $idn_support = $idn_support ?? function_exists('idn_to_ascii') && defined('INTL_IDNA_VARIANT_UTS46'); 160 | 161 | // @codeCoverageIgnoreStart 162 | // added because it is not possible in travis to disabled the ext/intl extension 163 | // see travis issue https://github.com/travis-ci/travis-ci/issues/4701 164 | if (!$idn_support) { 165 | throw new MissingIdnSupport(sprintf('the host `%s` could not be processed for IDN. Verify that ext/intl is installed for IDN support and that ICU is at least version 4.6.', $host)); 166 | } 167 | // @codeCoverageIgnoreEnd 168 | 169 | $ascii_host = idn_to_ascii($host, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46, $arr); 170 | 171 | // @codeCoverageIgnoreStart 172 | if (false === $ascii_host && 0 === $arr['errors']) { 173 | throw new UnexpectedValueException(sprintf('The Intl extension is misconfigured for %s, please correct this issue before proceeding.', PHP_OS)); 174 | } 175 | // @codeCoverageIgnoreEnd 176 | 177 | return 0 === $arr['errors']; 178 | } 179 | 180 | /** 181 | * Returns whether a port is valid. 182 | * 183 | * @see https://tools.ietf.org/html/rfc3986#section-3.2.2 184 | */ 185 | public function isPort($port): bool 186 | { 187 | static $pattern = '/^[0-9]+$/'; 188 | 189 | if (null === $port || '' === $port) { 190 | return true; 191 | } 192 | 193 | return 1 === preg_match($pattern, (string) $port); 194 | } 195 | 196 | /** 197 | * Parse a URI string into its components. 198 | * 199 | * @see Parser::parse 200 | * 201 | * @throws Exception if the URI contains invalid characters 202 | */ 203 | public function __invoke(string $uri): array 204 | { 205 | return $this->parse($uri); 206 | } 207 | 208 | /** 209 | * Parse an URI string into its components. 210 | * 211 | * This method parses a URI and returns an associative array containing any 212 | * of the various components of the URI that are present. 213 | * 214 | * 215 | * $components = (new Parser())->parse('http://foo@test.example.com:42?query#'); 216 | * var_export($components); 217 | * //will display 218 | * array( 219 | * 'scheme' => 'http', // the URI scheme component 220 | * 'user' => 'foo', // the URI user component 221 | * 'pass' => null, // the URI pass component 222 | * 'host' => 'test.example.com', // the URI host component 223 | * 'port' => 42, // the URI port component 224 | * 'path' => '', // the URI path component 225 | * 'query' => 'query', // the URI query component 226 | * 'fragment' => '', // the URI fragment component 227 | * ); 228 | * 229 | * 230 | * The returned array is similar to PHP's parse_url return value with the following 231 | * differences: 232 | * 233 | *
    234 | *
  • All components are always present in the returned array
  • 235 | *
  • Empty and undefined component are treated differently. And empty component is 236 | * set to the empty string while an undefined component is set to the `null` value.
  • 237 | *
  • The path component is never undefined
  • 238 | *
  • The method parses the URI following the RFC3986 rules but you are still 239 | * required to validate the returned components against its related scheme specific rules.
  • 240 | *
241 | * 242 | * @see https://tools.ietf.org/html/rfc3986 243 | * @see https://tools.ietf.org/html/rfc3986#section-2 244 | * 245 | * @throws Exception if the URI contains invalid characters 246 | */ 247 | public function parse(string $uri): array 248 | { 249 | static $pattern = '/[\x00-\x1f\x7f]/'; 250 | 251 | //simple URI which do not need any parsing 252 | static $simple_uri = [ 253 | '' => [], 254 | '#' => ['fragment' => ''], 255 | '?' => ['query' => ''], 256 | '?#' => ['query' => '', 'fragment' => ''], 257 | '/' => ['path' => '/'], 258 | '//' => ['host' => ''], 259 | ]; 260 | 261 | if (isset($simple_uri[$uri])) { 262 | return array_merge(self::URI_COMPONENTS, $simple_uri[$uri]); 263 | } 264 | 265 | if (1 === preg_match($pattern, $uri)) { 266 | throw Exception::createFromInvalidCharacters($uri); 267 | } 268 | 269 | //if the first character is a known URI delimiter parsing can be simplified 270 | $first_char = $uri[0]; 271 | 272 | //The URI is made of the fragment only 273 | if ('#' === $first_char) { 274 | $components = self::URI_COMPONENTS; 275 | $components['fragment'] = (string) substr($uri, 1); 276 | 277 | return $components; 278 | } 279 | 280 | //The URI is made of the query and fragment 281 | if ('?' === $first_char) { 282 | $components = self::URI_COMPONENTS; 283 | list($components['query'], $components['fragment']) = explode('#', substr($uri, 1), 2) + [1 => null]; 284 | 285 | return $components; 286 | } 287 | 288 | //The URI does not contain any scheme part 289 | if (0 === strpos($uri, '//')) { 290 | return $this->parseSchemeSpecificPart($uri); 291 | } 292 | 293 | //The URI is made of a path, query and fragment 294 | if ('/' === $first_char || false === strpos($uri, ':')) { 295 | return $this->parsePathQueryAndFragment($uri); 296 | } 297 | 298 | //Fallback parser 299 | return $this->fallbackParser($uri); 300 | } 301 | 302 | /** 303 | * Extract components from a URI without a scheme part. 304 | * 305 | * The URI MUST start with the authority component 306 | * preceded by its delimiter the double slash ('//') 307 | * 308 | * Example: //user:pass@host:42/path?query#fragment 309 | * 310 | * The authority MUST adhere to the RFC3986 requirements. 311 | * 312 | * If the URI contains a path component, it MUST be empty or absolute 313 | * according to RFC3986 path classification. 314 | * 315 | * This method returns an associative array containing all URI components. 316 | * 317 | * @see https://tools.ietf.org/html/rfc3986#section-3.2 318 | * @see https://tools.ietf.org/html/rfc3986#section-3.3 319 | * 320 | * @throws Exception If any component of the URI is invalid 321 | */ 322 | protected function parseSchemeSpecificPart(string $uri): array 323 | { 324 | //We remove the authority delimiter 325 | $remaining_uri = (string) substr($uri, 2); 326 | $components = self::URI_COMPONENTS; 327 | 328 | //Parsing is done from the right upmost part to the left 329 | //1 - detect fragment, query and path part if any 330 | list($remaining_uri, $components['fragment']) = explode('#', $remaining_uri, 2) + [1 => null]; 331 | list($remaining_uri, $components['query']) = explode('?', $remaining_uri, 2) + [1 => null]; 332 | if (false !== strpos($remaining_uri, '/')) { 333 | list($remaining_uri, $components['path']) = explode('/', $remaining_uri, 2) + [1 => null]; 334 | $components['path'] = '/'.$components['path']; 335 | } 336 | 337 | //2 - The $remaining_uri represents the authority part 338 | //if the authority part is empty parsing is simplified 339 | if ('' === $remaining_uri) { 340 | $components['host'] = ''; 341 | 342 | return $components; 343 | } 344 | 345 | //otherwise we split the authority into the user information and the hostname parts 346 | $parts = explode('@', $remaining_uri, 2); 347 | $hostname = $parts[1] ?? $parts[0]; 348 | $user_info = isset($parts[1]) ? $parts[0] : null; 349 | if (null !== $user_info) { 350 | list($components['user'], $components['pass']) = explode(':', $user_info, 2) + [1 => null]; 351 | } 352 | list($components['host'], $components['port']) = $this->parseHostname($hostname); 353 | 354 | return $components; 355 | } 356 | 357 | /** 358 | * Parse and validate the URI hostname. 359 | * 360 | * @throws Exception If the hostname is invalid 361 | */ 362 | protected function parseHostname(string $hostname): array 363 | { 364 | if (false === strpos($hostname, '[')) { 365 | list($host, $port) = explode(':', $hostname, 2) + [1 => null]; 366 | 367 | return [$this->filterHost($host), $this->filterPort($port)]; 368 | } 369 | 370 | $delimiter_offset = strpos($hostname, ']') + 1; 371 | if (isset($hostname[$delimiter_offset]) && ':' !== $hostname[$delimiter_offset]) { 372 | throw Exception::createFromInvalidHostname($hostname); 373 | } 374 | 375 | return [ 376 | $this->filterHost(substr($hostname, 0, $delimiter_offset)), 377 | $this->filterPort(substr($hostname, ++$delimiter_offset)), 378 | ]; 379 | } 380 | 381 | /** 382 | * validate the host component. 383 | * 384 | * @param string|null $host 385 | * 386 | * @throws Exception If the hostname is invalid 387 | * 388 | * @return string|null 389 | */ 390 | protected function filterHost($host) 391 | { 392 | if (null === $host || $this->isHost($host)) { 393 | return $host; 394 | } 395 | 396 | throw Exception::createFromInvalidHost($host); 397 | } 398 | 399 | /** 400 | * Validate a port number. 401 | * 402 | * An exception is raised for ports outside the established TCP and UDP port ranges. 403 | * 404 | * @param mixed $port the port number 405 | * 406 | * @throws Exception If the port number is invalid. 407 | * 408 | * @return null|int 409 | */ 410 | protected function filterPort($port) 411 | { 412 | static $pattern = '/^[0-9]+$/'; 413 | 414 | if (null === $port || false === $port || '' === $port) { 415 | return null; 416 | } 417 | 418 | if (1 !== preg_match($pattern, (string) $port)) { 419 | throw Exception::createFromInvalidPort($port); 420 | } 421 | 422 | return (int) $port; 423 | } 424 | 425 | 426 | /** 427 | * Extract Components from an URI without scheme or authority part. 428 | * 429 | * The URI contains a path component and MUST adhere to path requirements 430 | * of RFC3986. The path can be 431 | * 432 | * 433 | * path = path-abempty ; begins with "/" or is empty 434 | * / path-absolute ; begins with "/" but not "//" 435 | * / path-noscheme ; begins with a non-colon segment 436 | * / path-rootless ; begins with a segment 437 | * / path-empty ; zero characters 438 | * 439 | * 440 | * ex: path?q#f 441 | * ex: /path 442 | * ex: /pa:th#f 443 | * 444 | * This method returns an associative array containing all URI components. 445 | * 446 | * @see https://tools.ietf.org/html/rfc3986#section-3.3 447 | * 448 | * @throws Exception If the path component is invalid 449 | */ 450 | protected function parsePathQueryAndFragment(string $uri): array 451 | { 452 | //No scheme is present so we ensure that the path respects RFC3986 453 | if (false !== ($pos = strpos($uri, ':')) && false === strpos(substr($uri, 0, $pos), '/')) { 454 | throw Exception::createFromInvalidPath($uri); 455 | } 456 | 457 | $components = self::URI_COMPONENTS; 458 | 459 | //Parsing is done from the right upmost part to the left 460 | //1 - detect the fragment part if any 461 | list($remaining_uri, $components['fragment']) = explode('#', $uri, 2) + [1 => null]; 462 | 463 | //2 - detect the query and the path part 464 | list($components['path'], $components['query']) = explode('?', $remaining_uri, 2) + [1 => null]; 465 | 466 | return $components; 467 | } 468 | 469 | /** 470 | * Extract components from an URI containing a colon. 471 | * 472 | * Depending on the colon ":" position and on the string 473 | * composition before the presence of the colon, the URI 474 | * will be considered to have an scheme or not. 475 | * 476 | *
    477 | *
  • In case no valid scheme is found according to RFC3986 the URI will 478 | * be parsed as an URI without a scheme and an authority
  • 479 | *
  • In case an authority part is detected the URI specific part is parsed 480 | * as an URI without scheme
  • 481 | *
482 | * 483 | * ex: email:johndoe@thephpleague.com?subject=Hellow%20World! 484 | * 485 | * This method returns an associative array containing all 486 | * the URI components. 487 | * 488 | * @see https://tools.ietf.org/html/rfc3986#section-3.1 489 | * @see Parser::parsePathQueryAndFragment 490 | * @see Parser::parseSchemeSpecificPart 491 | * 492 | * @throws Exception If the URI scheme component is empty 493 | */ 494 | protected function fallbackParser(string $uri): array 495 | { 496 | //1 - we split the URI on the first detected colon character 497 | $parts = explode(':', $uri, 2); 498 | $remaining_uri = $parts[1] ?? $parts[0]; 499 | $scheme = isset($parts[1]) ? $parts[0] : null; 500 | 501 | //1.1 - a scheme can not be empty (ie a URI can not start with a colon) 502 | if ('' === $scheme) { 503 | throw Exception::createFromInvalidScheme($uri); 504 | } 505 | 506 | //2 - depending on the scheme presence and validity we will differ the parsing 507 | 508 | //2.1 - If the scheme part is invalid the URI may be an URI with a path-noscheme 509 | // let's differ the parsing to the Parser::parsePathQueryAndFragment method 510 | if (!$this->isScheme($scheme)) { 511 | return $this->parsePathQueryAndFragment($uri); 512 | } 513 | 514 | $components = self::URI_COMPONENTS; 515 | $components['scheme'] = $scheme; 516 | 517 | //2.2 - if no scheme specific part is detect parsing is finished 518 | if ('' == $remaining_uri) { 519 | return $components; 520 | } 521 | 522 | //2.3 - if the scheme specific part is a double forward slash 523 | if ('//' === $remaining_uri) { 524 | $components['host'] = ''; 525 | 526 | return $components; 527 | } 528 | 529 | //2.4 - if the scheme specific part starts with double forward slash 530 | // we differ the remaining parsing to the Parser::parseSchemeSpecificPart method 531 | if (0 === strpos($remaining_uri, '//')) { 532 | $components = $this->parseSchemeSpecificPart($remaining_uri); 533 | $components['scheme'] = $scheme; 534 | 535 | return $components; 536 | } 537 | 538 | //2.5 - Parsing is done from the right upmost part to the left from the scheme specific part 539 | //2.5.1 - detect the fragment part if any 540 | list($remaining_uri, $components['fragment']) = explode('#', $remaining_uri, 2) + [1 => null]; 541 | 542 | //2.5.2 - detect the part and query part if any 543 | list($components['path'], $components['query']) = explode('?', $remaining_uri, 2) + [1 => null]; 544 | 545 | return $components; 546 | } 547 | 548 | /** 549 | * Convert a registered name label to its IDNA ASCII form. 550 | * 551 | * DEPRECATION WARNING! This method will be removed in the next major point release 552 | * 553 | * @deprecated 1.4.0 this method is no longer used to validate RFC3987 compliant host component 554 | * @codeCoverageIgnore 555 | * 556 | * Conversion is done only if the label contains none valid label characters 557 | * if a '%' sub delimiter is detected the label MUST be rawurldecode prior to 558 | * making the conversion 559 | * 560 | * @return string|false 561 | */ 562 | protected function toAscii(string $label) 563 | { 564 | trigger_error( 565 | self::class.'::'.__METHOD__.' is deprecated and will be removed in the next major point release', 566 | E_USER_DEPRECATED 567 | ); 568 | 569 | if (false !== strpos($label, '%')) { 570 | $label = rawurldecode($label); 571 | } 572 | 573 | static $idn_support = null; 574 | $idn_support = $idn_support ?? function_exists('idn_to_ascii') && defined('INTL_IDNA_VARIANT_UTS46'); 575 | if (!$idn_support) { 576 | throw new MissingIdnSupport(sprintf('the label `%s` could not be processed for IDN. Verify that ext/intl is installed for IDN support and that ICU is at least version 4.6.', $label)); 577 | } 578 | 579 | $ascii_host = idn_to_ascii($label, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46, $arr); 580 | if (false === $ascii_host && 0 === $arr['errors']) { 581 | throw new UnexpectedValueException(sprintf('The Intl extension is misconfigured for %s, please correct this issue before proceeding.', PHP_OS)); 582 | } 583 | 584 | return $ascii_host; 585 | } 586 | 587 | /** 588 | * Returns whether the registered name label is valid. 589 | * 590 | * DEPRECATION WARNING! This method will be removed in the next major point release 591 | * 592 | * @deprecated 1.4.0 this method is no longer used to validated the host component 593 | * @codeCoverageIgnore 594 | * 595 | * A valid registered name label MUST conform to the following ABNF 596 | * 597 | * reg-name = *( unreserved / pct-encoded / sub-delims ) 598 | * 599 | * @see https://tools.ietf.org/html/rfc3986#section-3.2.2 600 | * 601 | * @param string $label 602 | */ 603 | protected function isHostLabel($label): bool 604 | { 605 | trigger_error( 606 | self::class.'::'.__METHOD__.' is deprecated and will be removed in the next major point release', 607 | E_USER_DEPRECATED 608 | ); 609 | 610 | return '' != $label 611 | && 63 >= strlen($label) 612 | && strlen($label) == strspn($label, self::LABEL_VALID_STARTING_CHARS.'-_~'.self::SUB_DELIMITERS); 613 | } 614 | 615 | /** 616 | * Validate an IPv6 host. 617 | * 618 | * DEPRECATION WARNING! This method will be removed in the next major point release 619 | * 620 | * @deprecated 1.4.0 this method is no longer used to validated the host component 621 | * @codeCoverageIgnore 622 | * 623 | * @see http://tools.ietf.org/html/rfc6874#section-2 624 | * @see http://tools.ietf.org/html/rfc6874#section-4 625 | */ 626 | protected function isIpv6Host(string $ipv6): bool 627 | { 628 | trigger_error( 629 | self::class.'::'.__METHOD__.' is deprecated and will be removed in the next major point release', 630 | E_USER_DEPRECATED 631 | ); 632 | 633 | if ('[' !== ($ipv6[0] ?? '') || ']' !== substr($ipv6, -1)) { 634 | return false; 635 | } 636 | 637 | $ipv6 = substr($ipv6, 1, -1); 638 | if (false === ($pos = strpos($ipv6, '%'))) { 639 | return (bool) filter_var($ipv6, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6); 640 | } 641 | 642 | $scope = rawurldecode(substr($ipv6, $pos)); 643 | if (strlen($scope) !== strcspn($scope, '?#@[]')) { 644 | return false; 645 | } 646 | 647 | $ipv6 = substr($ipv6, 0, $pos); 648 | if (!filter_var($ipv6, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) { 649 | return false; 650 | } 651 | 652 | //Only the address block fe80::/10 can have a Zone ID attach to 653 | //let's detect the link local significant 10 bits 654 | return 0 === strpos((string) inet_pton($ipv6), "\xfe\x80"); 655 | } 656 | } 657 | -------------------------------------------------------------------------------- /src/functions.php: -------------------------------------------------------------------------------- 1 | 7 | * @license https://github.com/thephpleague/uri-parser/blob/master/LICENSE (MIT License) 8 | * @version 1.4.1 9 | * @link https://uri.thephpleague.com/parser/ 10 | * 11 | * For the full copyright and license information, please view the LICENSE 12 | * file that was distributed with this source code. 13 | */ 14 | 15 | declare(strict_types=1); 16 | 17 | namespace League\Uri; 18 | 19 | /** 20 | * Returns whether the URI host component is valid according to RFC3986. 21 | * 22 | * @see https://tools.ietf.org/html/rfc3986#section-3.2.2 23 | * @see Parser::isHost() 24 | */ 25 | function is_host(string $host): bool 26 | { 27 | static $parser; 28 | 29 | $parser = $parser ?? new Parser(); 30 | 31 | return $parser->isHost($host); 32 | } 33 | 34 | /** 35 | * Returns whether the URI port component is valid according to RFC3986. 36 | * 37 | * @see https://tools.ietf.org/html/rfc3986#section-3.2.3 38 | * @see Parser::isPort() 39 | */ 40 | function is_port($port): bool 41 | { 42 | static $parser; 43 | 44 | $parser = $parser ?? new Parser(); 45 | 46 | return $parser->isPort($port); 47 | } 48 | 49 | /** 50 | * Returns whether the URI scheme component is valid according to RFC3986. 51 | * 52 | * @see https://tools.ietf.org/html/rfc3986#section-3.1 53 | * @see Parser::isScheme() 54 | */ 55 | function is_scheme(string $scheme): bool 56 | { 57 | static $parser; 58 | 59 | $parser = $parser ?? new Parser(); 60 | 61 | return $parser->isScheme($scheme); 62 | } 63 | 64 | /** 65 | * Parse an URI string into its components. 66 | * 67 | * This method parses a URL and returns an associative array containing any 68 | * of the various components of the URL that are present. 69 | * 70 | * @see https://tools.ietf.org/html/rfc3986 71 | * @see https://tools.ietf.org/html/rfc3986#section-2 72 | * @see Parser::parse() 73 | * 74 | * @throws Exception if the URI contains invalid characters 75 | */ 76 | function parse(string $uri): array 77 | { 78 | static $parser; 79 | 80 | $parser = $parser ?? new Parser(); 81 | 82 | return $parser->parse($uri); 83 | } 84 | 85 | /** 86 | * Generate an URI string representation from its parsed representation 87 | * returned by League\Uri\Parser::parse() or PHP's parse_url. 88 | * 89 | * If you supply your own array, you are responsible for providing 90 | * valid components without their URI delimiters. 91 | * 92 | * For security reasons the password (pass) component has been deprecated 93 | * as per RFC3986 and is never returned in the URI string 94 | * 95 | * @see https://tools.ietf.org/html/rfc3986#section-5.3 96 | * @see https://tools.ietf.org/html/rfc3986#section-7.5 97 | */ 98 | function build(array $components): string 99 | { 100 | $uri = $components['path'] ?? ''; 101 | if (isset($components['query'])) { 102 | $uri .= '?'.$components['query']; 103 | } 104 | 105 | if (isset($components['fragment'])) { 106 | $uri .= '#'.$components['fragment']; 107 | } 108 | 109 | if (isset($components['host'])) { 110 | $authority = $components['host']; 111 | if (isset($components['port'])) { 112 | $authority .= ':'.$components['port']; 113 | } 114 | 115 | if (isset($components['user'])) { 116 | $authority = $components['user'].'@'.$authority; 117 | } 118 | 119 | $uri = '//'.$authority.$uri; 120 | } 121 | 122 | if (isset($components['scheme'])) { 123 | return $components['scheme'].':'.$uri; 124 | } 125 | 126 | return $uri; 127 | } 128 | -------------------------------------------------------------------------------- /src/functions_include.php: -------------------------------------------------------------------------------- 1 | 7 | * @license https://github.com/thephpleague/uri-parser/blob/master/LICENSE (MIT License) 8 | * @version 1.4.1 9 | * @link https://uri.thephpleague.com/parser/ 10 | * 11 | * For the full copyright and license information, please view the LICENSE 12 | * file that was distributed with this source code. 13 | */ 14 | 15 | if (!function_exists('League\Uri\parse')) { 16 | require __DIR__.'/functions.php'; 17 | } 18 | --------------------------------------------------------------------------------