├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── composer.json ├── phpunit.xml.dist ├── src └── Fleshgrinder │ └── Validator │ └── URL.php └── test └── Fleshgrinder └── Validator └── URLTest.php /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | var 3 | vendor 4 | composer.lock 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2010-2015 Diego Perini & Richard Fussenegger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # URL Validation 2 | I made this repository specifically for Mathias Bynens' little “[in search of the perfect URL validation regex] 3 | (https://mathiasbynens.be/demo/url-regex)” challenge and if I may spoiler you from the beginning, mine is not 4 | perfect. But it is a step closer to it. 5 | 6 | The regular expression contains parts from [Diego Perini's regular expression and the comments on his Gist] 7 | (https://gist.github.com/dperini/729294) as well as some stuff from the [Symfony URL constraint pattern] 8 | (https://github.com/symfony/Validator/blob/master/Constraints/UrlValidator.php#L34-L36). 9 | 10 | Note that the challenge does not cover all possible valid URL constructs. The unit test contains several URLs which 11 | should be valid and not valid which are not part of the challenge. 12 | 13 | Also note that this class is not meant as a real validator, it is more a starting point for a validator. I had to 14 | release the code under the MIT license because it incorporates a big portion of Diego Perini's regular expression—but I 15 | will ask him if it is possible for me to release it under the [Unlicense](http://unlicense.org) license. If you plan to 16 | use this regular expression in your code consider to remove the username, password, port and IP address support; since 17 | such addresses should not be used for e.g. homepages of users on a profile page or within comments on a blog post. 18 | 19 | On a last note, the class also contains a scheme (aka protocol) validation regular expression. 20 | 21 | The provided unit test has a 100% coverage of the little class and the code is PHP 5.3+ compatible. 22 | 23 | ## Features 24 | * Full [Internationalized Domain Name (IDN)](https://en.wikipedia.org/wiki/Internationalized_domain_name) support. 25 | * Full support for [Punycode](https://en.wikipedia.org/wiki/Punycode). 26 | * Support for IPv4 and IPv6 addresses as hostname. 27 | * Extraction of URL parts (like [`parse_url`](https://php.net/parse-url)): 28 | * Scheme (aka protocol) 29 | * Username 30 | * Password 31 | * Hostname 32 | * Domain + TLD 33 | * IPv4 34 | * IPv6 35 | * Port 36 | * Path 37 | * Query 38 | * Fragment 39 | 40 | ## Install 41 | The class and tests are available via [composer](https://getcomposer.org/). 42 | 43 | ```shell 44 | composer require fleshgrinder/url-validator dev-master 45 | ``` 46 | 47 | ## TODO 48 | * IPv6 address validation totally relies on [PHP's `filter_var`](https://php.net/filter-var) implementation, find a way 49 | to validate it with the regular expression. 50 | * Port the regular expression to JavaScript for usage in HTML input URL elements and of course JavaScript itself. 51 | * Find more funny URLs for the unit test. 52 | 53 | ## Weblinks 54 | - [Packagist](https://packagist.org/packages/fleshgrinder/url-validator) 55 | 56 | ## License 57 | > The MIT License (MIT) 58 | > 59 | > For more information, please refer to 60 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fleshgrinder/url-validator", 3 | "description": "A regular expression to validate URLs.", 4 | "keywords": [ 5 | "url", 6 | "uri", 7 | "validation", 8 | "regex", 9 | "regexp", 10 | "regular expression" 11 | ], 12 | "authors": [ 13 | { 14 | "name": "Richard Fussenegger", 15 | "email": "richard@fussenegger.info", 16 | "homepage": "http://richard.fussenegger.info" 17 | } 18 | ], 19 | "license": "Unlicense", 20 | "version": "0.1.0", 21 | "autoload": { 22 | "psr-4": { 23 | "Fleshgrinder\\Validator\\": [ "src/Fleshgrinder/Validator" ] 24 | } 25 | }, 26 | "require": { 27 | "ext-intl": "*", 28 | "php": ">=5.3" 29 | }, 30 | "autoload-dev": { 31 | "psr-4": { 32 | "Fleshgrinder\\Validator\\": [ "test/Fleshgrinder/Validator" ] 33 | } 34 | }, 35 | "require-dev": { 36 | "phpunit/phpunit": "~4.4" 37 | }, 38 | "support": { 39 | "email": "richard@fussenegger.info", 40 | "issues": "https://github.com/Fleshgrinder/php-url-validator/issues", 41 | "source": "https://github.com/Fleshgrinder/php-url-validator.git" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | 10 | ./test 11 | ./vendor 12 | 13 | 14 | ./src 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/Fleshgrinder/Validator/URL.php: -------------------------------------------------------------------------------- 1 | 33 | * @copyright 2010-2015 Diego Perini & Richard Fussenegger 34 | * @license MIT 35 | */ 36 | final class URL 37 | { 38 | 39 | 40 | // ----------------------------------------------------------------------------------------------------------------- Constants 41 | 42 | 43 | /** 44 | * The regular expression pattern for scheme validation. 45 | * 46 | * @see https://tools.ietf.org/html/std66 47 | * @see https://tools.ietf.org/html/rfc3986 48 | * @var string 49 | */ 50 | const SCHEME_PATTERN = '/^[a-z][a-z0-9+\.-]*$/Di'; 51 | 52 | /** 53 | * The regular expression pattern for URL validation. 54 | * 55 | * Please note that the IPv6 regular expression is taken from the Symfony project, see link below. 56 | * 57 | * @see https://github.com/symfony/Validator/blob/master/Constraints/UrlValidator.php#L34-L36 58 | * @see https://tools.ietf.org/html/rfc3986 59 | * @var string 60 | */ 61 | const URL_PATTERN = '/^ 62 | (?\'scheme\'%s) 63 | :\/\/ 64 | (?: 65 | (?\'username\'.+) 66 | (?::(?\'password\'.+))? 67 | @)? 68 | (?\'hostname\' 69 | (?!\.) 70 | (?\'domain\'(?:\.?(?:xn--[[:alnum:]-]+|(?!..--)[[:alnum:]\x{00a1}-\x{ffff}]+-*))+) 71 | (?setAllowedSchemes($allowedSchemes); 229 | } 230 | if ($url !== null) { 231 | $this->validate($url); 232 | } 233 | } 234 | 235 | /** 236 | * Get the string representation of the URL. 237 | * 238 | * @return string 239 | * The string representation of the URL. 240 | */ 241 | public function __toString() 242 | { 243 | return (string) $this->url; 244 | } 245 | 246 | 247 | // ----------------------------------------------------------------------------------------------------------------- Methods 248 | 249 | 250 | /** 251 | * Set the allowed schemes. 252 | * 253 | * By default `http` and `https` are allowed. 254 | * 255 | * @param array|string $allowedSchemes 256 | * The schemes to allow. 257 | * @return $this 258 | * @throws \InvalidArgumentException 259 | * If a scheme is empty or contains illegal characters. 260 | */ 261 | public function setAllowedSchemes($allowedSchemes) 262 | { 263 | if (empty($allowedSchemes)) { 264 | throw new \InvalidArgumentException("Allowed schemes cannot be empty."); 265 | } 266 | 267 | $allowedSchemes = (array) $allowedSchemes; 268 | $c = count($allowedSchemes); 269 | for ($i = 0; $i < $c; ++$i) { 270 | if (empty($allowedSchemes[$i])) { 271 | throw new \InvalidArgumentException("An allowed scheme cannot be empty."); 272 | } elseif (!preg_match(static::SCHEME_PATTERN, $allowedSchemes[$i])) { 273 | throw new \InvalidArgumentException("Allowed scheme [{$allowedSchemes[$i]}] contains illegal characters (see RFC3986)."); 274 | } 275 | } 276 | $this->allowedSchemes = $allowedSchemes; 277 | 278 | return $this; 279 | } 280 | 281 | /** 282 | * Reset all properties to their defaults. 283 | * 284 | * @return $this 285 | */ 286 | public function reset() 287 | { 288 | $this->domain = null; 289 | $this->fragment = null; 290 | $this->hostname = null; 291 | $this->ipv4 = null; 292 | $this->ipv6 = null; 293 | $this->password = null; 294 | $this->path = null; 295 | $this->port = null; 296 | $this->query = null; 297 | $this->scheme = null; 298 | $this->tld = null; 299 | $this->url = null; 300 | $this->username = null; 301 | 302 | return $this; 303 | } 304 | 305 | /** 306 | * Validate the URL. 307 | * 308 | * The various URL parts are exported to class scope, have a look at the public properties of this class. Note that 309 | * changing any of the properties does not alter the URL itself which this instance represents. 310 | * 311 | * @param string $url 312 | * The URL to set. 313 | * @return $this 314 | * @throws \InvalidArgumentException 315 | * If the URL is empty or invalid. 316 | */ 317 | public function validate($url) 318 | { 319 | $this->reset(); 320 | 321 | if ($url === null || $url === "") { 322 | throw new \InvalidArgumentException("URL cannot be empty."); 323 | } 324 | 325 | // No need to continue with boolean, float, integer, or what not since they will never contain a valid URL. 326 | if (!is_string($url) && !(is_object($url) && method_exists($url, "__toString"))) { 327 | throw new \InvalidArgumentException("URL must be representable as string."); 328 | } 329 | 330 | // NFC form is a requirement for a valid URL. 331 | if (strlen($url) !== strlen(utf8_decode($url)) && $url !== \Normalizer::normalize($url, \Normalizer::NFC)) { 332 | throw new \InvalidArgumentException("URL must be in Unicode normalization form NFC."); 333 | } 334 | 335 | if (!preg_match(sprintf(static::URL_PATTERN, implode("|", $this->allowedSchemes)), $url, $matches)) { 336 | throw new \InvalidArgumentException("URL [{$url}] is invalid."); 337 | } 338 | 339 | foreach ($matches as $property => $value) { 340 | if (!is_numeric($property) && !empty($value)) { 341 | $this->{$property} = $value; 342 | } 343 | } 344 | 345 | // TODO: Incorporate IPv6 validation into regular expression for JavaScript usage. 346 | if (isset($this->ipv6) && !filter_var(substr($this->ipv6, 1, -1), FILTER_VALIDATE_IP, FILTER_FLAG_IPV6 | FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) { 347 | // @codeCoverageIgnoreStart 348 | $e = new \InvalidArgumentException("IPv6 address {$this->ipv6} is invalid."); 349 | $this->reset(); 350 | throw $e; 351 | // @codeCoverageIgnoreEnd 352 | } 353 | 354 | $this->url = $url; 355 | 356 | return $this; 357 | } 358 | 359 | } 360 | -------------------------------------------------------------------------------- /test/Fleshgrinder/Validator/URLTest.php: -------------------------------------------------------------------------------- 1 | 28 | * @copyright 2010-2015 Diego Perini & Richard Fussenegger 29 | * @license MIT 30 | */ 31 | final class URLTest extends \PHPUnit_Framework_TestCase 32 | { 33 | 34 | 35 | // ----------------------------------------------------------------------------------------------------------------- Constants 36 | 37 | 38 | /** 39 | * This pattern matches all the test cases from the challenge and was the base for the regular expression I came up 40 | * with. A few tweaks here and there and it would be as good as my current regular expression is; of course Unicode 41 | * normalization to NFC is missing (done in PHP) and IPv6 validation (done in PHP) as well. 42 | * 43 | * @see https://gist.github.com/dperini/729294 44 | * @var string 45 | */ 46 | const DPERINI_PATTERN = '_^(?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]-*)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]-*)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/\S*)?$_iuS'; 47 | 48 | 49 | // ----------------------------------------------------------------------------------------------------------------- Data Provider 50 | 51 | 52 | /** 53 | * @return array 54 | */ 55 | public function dataProviderInvalidAllowedSchemes() 56 | { 57 | return array( 58 | array(0), 59 | array(null), 60 | array(""), 61 | array(array()), 62 | array(array(0)), 63 | array(array(null)), 64 | array(array("")), 65 | array(array("scheme~", "http")), 66 | array(array("http", "42scheme")), 67 | array(array(42)), 68 | ); 69 | } 70 | 71 | /** 72 | * @return array 73 | */ 74 | public function dataProviderValidURLs() 75 | { 76 | return array( 77 | array("http://foo.com/blah_blah"), 78 | array("http://foo.com/blah_blah/"), 79 | array("http://foo.com/blah_blah_(wikipedia)"), 80 | array("http://foo.com/blah_blah_(wikipedia)_(again)"), 81 | array("http://www.example.com/wpstyle/?p=364"), 82 | array("https://www.example.com/foo/?bar=baz&inga=42&quux"), 83 | array("http://✪df.ws/123"), 84 | array("http://userid:password@example.com:8080"), 85 | array("http://userid:password@example.com:8080/"), 86 | array("http://userid@example.com"), 87 | array("http://userid@example.com/"), 88 | array("http://userid@example.com:8080"), 89 | array("http://userid@example.com:8080/"), 90 | array("http://userid:password@example.com"), 91 | array("http://userid:password@example.com/"), 92 | array("http://142.42.1.1/"), 93 | array("http://142.42.1.1:8080/"), 94 | array("http://➡.ws/䨹"), 95 | array("http://⌘.ws"), 96 | array("http://⌘.ws/"), 97 | array("http://foo.com/blah_(wikipedia)#cite-1"), 98 | array("http://foo.com/blah_(wikipedia)_blah#cite-1"), 99 | array("http://foo.com/unicode_(✪)_in_parens"), 100 | array("http://foo.com/(something)?after=parens"), 101 | array("http://☺.damowmow.com/"), 102 | array("http://code.google.com/events/#&product=browser"), 103 | array("http://j.mp"), 104 | array("ftp://foo.bar/baz"), 105 | array("http://foo.bar/?q=Test%20URL-encoded%20stuff"), 106 | array("http://مثال.إختبار"), 107 | array("http://xn--mgbh0fb.xn--kgbechtv"), 108 | array("http://例子.测试"), 109 | array("http://xn--fsqu00a.xn--0zwm56d"), 110 | array("http://उदाहरण.परीक्षा"), 111 | array("http://xn--p1b6ci4b4b3a.xn--11b5bs3a6bxe"), 112 | array("http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com"), 113 | array("http://1337.net"), 114 | array("http://a.b-c.de"), 115 | array("http://223.255.255.254"), 116 | array("http://Ω.com"), 117 | array("http://xn--bya.com/"), 118 | array("http://北京大学.中國"), 119 | array("http://xn--1lq90ic7fzpc.xn--fiqz9s"), 120 | array("http://xn--oogle-qmc.com"), 121 | array("http://www.öbb.at/"), 122 | array("http://www.xn--bb-eka.at"), 123 | array("http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html"), 124 | array("http://[1080:0:0:0:8:800:200C:417A]/index.html"), 125 | array("http://[3ffe:2a00:100:7031::1]"), 126 | array("http://[1080::8:800:200C:417A]/foo"), 127 | array("http://[::192.9.5.5]/ipng"), 128 | array("http://[::FFFF:129.144.52.38]:80/index.html"), 129 | array("http://[2010:836B:4179::836B:4179]"), 130 | array("http://a.b--c.de/"), 131 | array("http://www.foo´bar.com"), 132 | array("http://www.example.com?test=123"), 133 | array("http://www.example.com/path?test=123") 134 | ); 135 | } 136 | 137 | /** 138 | * @return array 139 | */ 140 | public function dataProviderInvalidType() 141 | { 142 | return array( 143 | array(null), 144 | array(0), 145 | array(0.0), 146 | array(""), 147 | array(true), 148 | array(false), 149 | array(array()), 150 | array((object) array()), 151 | ); 152 | } 153 | 154 | /** 155 | * @return array 156 | */ 157 | public function dataProviderInvalidURLs() 158 | { 159 | $c1 = chr(1); 160 | $c2 = chr(2); 161 | $c3 = chr(3); 162 | $c4 = chr(4); 163 | $c5 = chr(5); 164 | $c6 = chr(6); 165 | $c7 = chr(7); 166 | $c8 = chr(8); 167 | $c14 = chr(14); 168 | $c15 = chr(15); 169 | $c16 = chr(16); 170 | $c17 = chr(17); 171 | $c18 = chr(18); 172 | $c19 = chr(19); 173 | $c20 = chr(20); 174 | $c21 = chr(21); 175 | $c22 = chr(22); 176 | $c23 = chr(23); 177 | $c24 = chr(24); 178 | $c127 = chr(127); 179 | 180 | return array( 181 | array("http://"), 182 | array("http://."), 183 | array("http://.."), 184 | array("http://../"), 185 | array("http://?"), 186 | array("http://??"), 187 | array("http://??/"), 188 | array("http://#"), 189 | array("http://##"), 190 | array("http://##/"), 191 | array("http://foo.bar?q=Spaces should be encoded"), 192 | array("//"), 193 | array("//a"), 194 | array("///a"), 195 | array("///"), 196 | array("http:///a"), 197 | array("foo.com"), 198 | array("rdar://1234"), 199 | array("h://test"), 200 | array("http:// shouldfail.com"), 201 | array(":// should fail"), 202 | array("http://foo.bar/foo(bar)baz quux"), 203 | array("ftps://foo.bar/"), 204 | array("http://-error-.invalid/"), 205 | array("http://-a.b.co"), 206 | array("http://a.b-.co"), 207 | array("http://0.0.0.0"), 208 | array("http://10.1.1.0"), 209 | array("http://10.1.1.255"), 210 | array("http://224.1.1.1"), 211 | array("http://1.1.1.1.1"), 212 | array("http://123.123.123"), 213 | array("http://3628126748"), 214 | array("http://.www.foo.bar/"), 215 | array("http://www.foo.bar./"), 216 | array("http://.www.foo.bar./"), 217 | array("http://10.1.1.1"), 218 | array("http://203.0.113.0"), 219 | array("http://198.51.100.0"), 220 | array("http://192.0.2.0"), 221 | array("http://example.com/path\0/to\0/file"), 222 | array("http://example.com/path{$c1}/to{$c1}/file"), 223 | array("http://example.com/path{$c2}/to{$c2}/file"), 224 | array("http://example.com/path{$c3}/to{$c3}/file"), 225 | array("http://example.com/path{$c4}/to{$c4}/file"), 226 | array("http://example.com/path{$c5}/to{$c5}/file"), 227 | array("http://example.com/path{$c6}/to{$c6}/file"), 228 | array("http://example.com/path{$c7}/to{$c7}/file"), 229 | array("http://example.com/path{$c8}/to{$c8}/file"), 230 | array("http://example.com/path\t/to\t/file"), 231 | array("http://example.com/path\n/to\n/file"), 232 | array("http://example.com/path\v/to\v/file"), 233 | array("http://example.com/path\f/to\f/file"), 234 | array("http://example.com/path\r/to\r/file"), 235 | array("http://example.com/path{$c14}/to{$c14}/file"), 236 | array("http://example.com/path{$c15}/to{$c15}/file"), 237 | array("http://example.com/path{$c16}/to{$c16}/file"), 238 | array("http://example.com/path{$c17}/to{$c17}/file"), 239 | array("http://example.com/path{$c18}/to{$c18}/file"), 240 | array("http://example.com/path{$c19}/to{$c19}/file"), 241 | array("http://example.com/path{$c20}/to{$c20}/file"), 242 | array("http://example.com/path{$c21}/to{$c21}/file"), 243 | array("http://example.com/path{$c22}/to{$c22}/file"), 244 | array("http://example.com/path{$c23}/to{$c23}/file"), 245 | array("http://example.com/path{$c24}/to{$c24}/file"), 246 | array("http://example.com/path{$c127}/to{$c127}/file"), 247 | array("http://www.foo\0bar.com"), 248 | array("http://www.foo{$c1}bar.com"), 249 | array("http://www.foo{$c2}bar.com"), 250 | array("http://www.foo{$c3}bar.com"), 251 | array("http://www.foo{$c4}bar.com"), 252 | array("http://www.foo{$c5}bar.com"), 253 | array("http://www.foo{$c6}bar.com"), 254 | array("http://www.foo{$c7}bar.com"), 255 | array("http://www.foo{$c8}bar.com"), 256 | array("http://www.foo\tbar.com"), 257 | array("http://www.foo\nbar.com"), 258 | array("http://www.foo\vbar.com"), 259 | array("http://www.foo\fbar.com"), 260 | array("http://www.foo\rbar.com"), 261 | array("http://www.foo{$c14}bar.com"), 262 | array("http://www.foo{$c15}bar.com"), 263 | array("http://www.foo{$c16}bar.com"), 264 | array("http://www.foo{$c17}bar.com"), 265 | array("http://www.foo{$c18}bar.com"), 266 | array("http://www.foo{$c19}bar.com"), 267 | array("http://www.foo{$c20}bar.com"), 268 | array("http://www.foo{$c21}bar.com"), 269 | array("http://www.foo{$c22}bar.com"), 270 | array("http://www.foo{$c23}bar.com"), 271 | array("http://www.foo{$c24}bar.com"), 272 | array("http://www.foo{$c127}bar.com"), 273 | array("http://www.foo bar.com"), 274 | array("http://www.foo!bar.com"), 275 | array('http://www.foo"bar.com'), 276 | array("http://www.foo#bar.com"), 277 | array('http://www.foo$bar.com'), 278 | array("http://www.foo%bar.com"), 279 | array("http://www.foo&bar.com"), 280 | array("http://www.foo'bar.com"), 281 | array("http://www.foo(bar.com"), 282 | array("http://www.foo)bar.com"), 283 | array("http://www.foo*bar.com"), 284 | array("http://www.foo+bar.com"), 285 | array("http://www.foo,bar.com"), 286 | array("http://www.foo:bar.com"), 287 | array("http://www.foo;bar.com"), 288 | array("http://www.foobar.com"), 291 | array("http://www.foo?bar.com"), 292 | array("http://www.foo[bar.com"), 293 | array('http://www.foo\bar.com'), 294 | array("http://www.foo]bar.com"), 295 | array("http://www.foo^bar.com"), 296 | array("http://www.foo_bar.com"), 297 | array("http://www.foo`bar.com"), 298 | array("http://www.foo{bar.com"), 299 | array("http://www.foo}bar.com"), 300 | array("http://www.foo|bar.com"), 301 | array("http://www.foobár.com"), // Unicode normalization NFD 302 | // Hyphens on the thrid and fourth position are not allowed because they would collide with Punnycode. 303 | // http://www.unicode.org/reports/tr46/#Validity_Criteria 304 | array("http://fo--o.com"), 305 | ); 306 | } 307 | 308 | 309 | // ----------------------------------------------------------------------------------------------------------------- Tests 310 | 311 | 312 | /** 313 | * @covers ::__construct 314 | * @covers ::setAllowedSchemes 315 | */ 316 | public function testValidAllowedSchemes() 317 | { 318 | $allowedSchemes = array("http", "https", "ftp", "ftps", "file"); 319 | $url = new URL(null, $allowedSchemes); 320 | $property = new \ReflectionProperty($url, "allowedSchemes"); 321 | $property->setAccessible(true); 322 | $this->assertEquals($allowedSchemes, $property->getValue($url)); 323 | } 324 | 325 | /** 326 | * @covers ::setAllowedSchemes 327 | * @uses ::__construct 328 | * @dataProvider dataProviderInvalidAllowedSchemes 329 | * @expectedException \InvalidArgumentException 330 | * @param mixed $allowedSchemes 331 | */ 332 | public function testSetAllowedSchemes($allowedSchemes) 333 | { 334 | $url = new URL(); 335 | $url->setAllowedSchemes($allowedSchemes); 336 | } 337 | 338 | /** 339 | * @covers ::__construct 340 | * @covers ::reset 341 | * @covers ::validate 342 | */ 343 | public function testPropertyExport() 344 | { 345 | $url = new URL("https://richard:42secret@www2.example.com:8080/path/to/file?key=value;#fragment42"); 346 | $this->assertEquals("https", $url->scheme); 347 | $this->assertEquals("richard", $url->username); 348 | $this->assertEquals("42secret", $url->password); 349 | $this->assertEquals("www2.example.com", $url->hostname); 350 | $this->assertEquals("www2.example", $url->domain); 351 | $this->assertEquals("com" ,$url->tld); 352 | $this->assertEquals(8080, $url->port); 353 | $this->assertEquals("/path/to/file", $url->path); 354 | $this->assertEquals("key=value;", $url->query); 355 | $this->assertEquals("fragment42", $url->fragment); 356 | $this->assertNull($url->ipv4); 357 | $this->assertNull($url->ipv6); 358 | } 359 | 360 | /** 361 | * @covers ::__construct 362 | * @covers ::reset 363 | * @covers ::validate 364 | */ 365 | public function testPropertyExportIPv4() 366 | { 367 | $url = new URL("https://richard:42secret@142.42.1.1:8080/path/to/file?key=value;#fragment42"); 368 | $this->assertEquals("https", $url->scheme); 369 | $this->assertEquals("richard", $url->username); 370 | $this->assertEquals("42secret", $url->password); 371 | $this->assertEquals("142.42.1.1", $url->hostname); 372 | $this->assertEquals("142.42.1.1", $url->ipv4); 373 | $this->assertEquals(8080, $url->port); 374 | $this->assertEquals("/path/to/file", $url->path); 375 | $this->assertEquals("key=value;", $url->query); 376 | $this->assertEquals("fragment42", $url->fragment); 377 | $this->assertNull($url->domain); 378 | $this->assertNull($url->ipv6); 379 | $this->assertNull($url->tld); 380 | } 381 | 382 | /** 383 | * @covers ::__construct 384 | * @covers ::reset 385 | * @covers ::validate 386 | */ 387 | public function testPropertyExportIPv6() 388 | { 389 | $url = new URL("https://richard:42secret@[2010:836B:4179::836B:4179]:8080/path/to/file?key=value;#fragment42"); 390 | $this->assertEquals("https", $url->scheme); 391 | $this->assertEquals("richard", $url->username); 392 | $this->assertEquals("42secret", $url->password); 393 | $this->assertEquals("[2010:836B:4179::836B:4179]", $url->hostname); 394 | $this->assertEquals("[2010:836B:4179::836B:4179]", $url->ipv6); 395 | $this->assertEquals(8080, $url->port); 396 | $this->assertEquals("/path/to/file", $url->path); 397 | $this->assertEquals("key=value;", $url->query); 398 | $this->assertEquals("fragment42", $url->fragment); 399 | $this->assertNull($url->domain); 400 | $this->assertNull($url->ipv4); 401 | $this->assertNull($url->tld); 402 | } 403 | 404 | /** 405 | * @covers ::__construct 406 | * @covers ::__toString 407 | * @covers ::reset 408 | * @covers ::validate 409 | * @dataProvider dataProviderValidURLs 410 | * @param string $url 411 | */ 412 | public function testValidURLs($url) 413 | { 414 | $instance = new URL($url); 415 | $this->assertEquals($url, $instance->__toString()); 416 | } 417 | 418 | /** 419 | * @covers ::__construct 420 | * @covers ::reset 421 | * @covers ::validate 422 | * @dataProvider dataProviderInvalidType 423 | * @dataProvider dataProviderInvalidURLs 424 | * @expectedException \InvalidArgumentException 425 | * @param string $url 426 | */ 427 | public function testInvalidURLs($url) 428 | { 429 | $instance = new URL(); 430 | $instance->validate($url); 431 | // The following will only execute if no exception was thrown; otherwise we will not know which URL it was. 432 | $this->assertTrue(false, $url); 433 | } 434 | 435 | /** 436 | * Note that this test is not meant to illustrate that his regular expression is not good, it is to illustrate that 437 | * the challenge does not cover all possible URL constructs. 438 | * 439 | * @coversNothing 440 | * @dataProvider dataProviderValidURLs 441 | * @param string $url 442 | */ 443 | public function testValidURLsWithDiegoPerinisRegularExpression($url) 444 | { 445 | $this->assertTrue((boolean) preg_match(self::DPERINI_PATTERN, $url), $url); 446 | } 447 | 448 | /** 449 | * Note that this test is not meant to illustrate that his regular expression is not good, it is to illustrate that 450 | * the challenge does not cover all possible URL constructs. 451 | * 452 | * @coversNothing 453 | * @dataProvider dataProviderInvalidURLs 454 | * @param string $url 455 | */ 456 | public function testInvalidURLsWithDiegoPerinisRegularExpression($url) 457 | { 458 | $this->assertFalse((boolean) preg_match(self::DPERINI_PATTERN, $url), $url); 459 | } 460 | 461 | } 462 | --------------------------------------------------------------------------------