├── .gitignore
├── .gitattributes
├── phpunit.xml.dist
├── LICENSE
├── composer.json
├── README.md
├── src
└── Fleshgrinder
│ └── Validator
│ └── URL.php
└── test
└── Fleshgrinder
└── Validator
└── URLTest.php
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | var
3 | vendor
4 | composer.lock
5 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/phpunit.xml.dist:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
10 | ./test
11 | ./vendor
12 |
13 |
14 | ./src
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2010-2015 Diego Perini & Richard Fussenegger
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "fleshgrinder/url-validator",
3 | "description": "A regular expression to validate URLs.",
4 | "keywords": [
5 | "url",
6 | "uri",
7 | "validation",
8 | "regex",
9 | "regexp",
10 | "regular expression"
11 | ],
12 | "authors": [
13 | {
14 | "name": "Richard Fussenegger",
15 | "email": "richard@fussenegger.info",
16 | "homepage": "http://richard.fussenegger.info"
17 | }
18 | ],
19 | "license": "Unlicense",
20 | "version": "0.1.0",
21 | "autoload": {
22 | "psr-4": {
23 | "Fleshgrinder\\Validator\\": [ "src/Fleshgrinder/Validator" ]
24 | }
25 | },
26 | "require": {
27 | "ext-intl": "*",
28 | "php": ">=5.3"
29 | },
30 | "autoload-dev": {
31 | "psr-4": {
32 | "Fleshgrinder\\Validator\\": [ "test/Fleshgrinder/Validator" ]
33 | }
34 | },
35 | "require-dev": {
36 | "phpunit/phpunit": "~4.4"
37 | },
38 | "support": {
39 | "email": "richard@fussenegger.info",
40 | "issues": "https://github.com/Fleshgrinder/php-url-validator/issues",
41 | "source": "https://github.com/Fleshgrinder/php-url-validator.git"
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # URL Validation
2 | I made this repository specifically for Mathias Bynens' little “[in search of the perfect URL validation regex]
3 | (https://mathiasbynens.be/demo/url-regex)” challenge and if I may spoiler you from the beginning, mine is not
4 | perfect. But it is a step closer to it.
5 |
6 | The regular expression contains parts from [Diego Perini's regular expression and the comments on his Gist]
7 | (https://gist.github.com/dperini/729294) as well as some stuff from the [Symfony URL constraint pattern]
8 | (https://github.com/symfony/Validator/blob/master/Constraints/UrlValidator.php#L34-L36).
9 |
10 | Note that the challenge does not cover all possible valid URL constructs. The unit test contains several URLs which
11 | should be valid and not valid which are not part of the challenge.
12 |
13 | Also note that this class is not meant as a real validator, it is more a starting point for a validator. I had to
14 | release the code under the MIT license because it incorporates a big portion of Diego Perini's regular expression—but I
15 | will ask him if it is possible for me to release it under the [Unlicense](http://unlicense.org) license. If you plan to
16 | use this regular expression in your code consider to remove the username, password, port and IP address support; since
17 | such addresses should not be used for e.g. homepages of users on a profile page or within comments on a blog post.
18 |
19 | On a last note, the class also contains a scheme (aka protocol) validation regular expression.
20 |
21 | The provided unit test has a 100% coverage of the little class and the code is PHP 5.3+ compatible.
22 |
23 | ## Features
24 | * Full [Internationalized Domain Name (IDN)](https://en.wikipedia.org/wiki/Internationalized_domain_name) support.
25 | * Full support for [Punycode](https://en.wikipedia.org/wiki/Punycode).
26 | * Support for IPv4 and IPv6 addresses as hostname.
27 | * Extraction of URL parts (like [`parse_url`](https://php.net/parse-url)):
28 | * Scheme (aka protocol)
29 | * Username
30 | * Password
31 | * Hostname
32 | * Domain + TLD
33 | * IPv4
34 | * IPv6
35 | * Port
36 | * Path
37 | * Query
38 | * Fragment
39 |
40 | ## Install
41 | The class and tests are available via [composer](https://getcomposer.org/).
42 |
43 | ```shell
44 | composer require fleshgrinder/url-validator dev-master
45 | ```
46 |
47 | ## TODO
48 | * IPv6 address validation totally relies on [PHP's `filter_var`](https://php.net/filter-var) implementation, find a way
49 | to validate it with the regular expression.
50 | * Port the regular expression to JavaScript for usage in HTML input URL elements and of course JavaScript itself.
51 | * Find more funny URLs for the unit test.
52 |
53 | ## Weblinks
54 | - [Packagist](https://packagist.org/packages/fleshgrinder/url-validator)
55 |
56 | ## License
57 | > The MIT License (MIT)
58 | >
59 | > For more information, please refer to
60 |
--------------------------------------------------------------------------------
/src/Fleshgrinder/Validator/URL.php:
--------------------------------------------------------------------------------
1 |
33 | * @copyright 2010-2015 Diego Perini & Richard Fussenegger
34 | * @license MIT
35 | */
36 | final class URL
37 | {
38 |
39 |
40 | // ----------------------------------------------------------------------------------------------------------------- Constants
41 |
42 |
43 | /**
44 | * The regular expression pattern for scheme validation.
45 | *
46 | * @see https://tools.ietf.org/html/std66
47 | * @see https://tools.ietf.org/html/rfc3986
48 | * @var string
49 | */
50 | const SCHEME_PATTERN = '/^[a-z][a-z0-9+\.-]*$/Di';
51 |
52 | /**
53 | * The regular expression pattern for URL validation.
54 | *
55 | * Please note that the IPv6 regular expression is taken from the Symfony project, see link below.
56 | *
57 | * @see https://github.com/symfony/Validator/blob/master/Constraints/UrlValidator.php#L34-L36
58 | * @see https://tools.ietf.org/html/rfc3986
59 | * @var string
60 | */
61 | const URL_PATTERN = '/^
62 | (?\'scheme\'%s)
63 | :\/\/
64 | (?:
65 | (?\'username\'.+)
66 | (?::(?\'password\'.+))?
67 | @)?
68 | (?\'hostname\'
69 | (?!\.)
70 | (?\'domain\'(?:\.?(?:xn--[[:alnum:]-]+|(?!..--)[[:alnum:]\x{00a1}-\x{ffff}]+-*))+)
71 | (?setAllowedSchemes($allowedSchemes);
229 | }
230 | if ($url !== null) {
231 | $this->validate($url);
232 | }
233 | }
234 |
235 | /**
236 | * Get the string representation of the URL.
237 | *
238 | * @return string
239 | * The string representation of the URL.
240 | */
241 | public function __toString()
242 | {
243 | return (string) $this->url;
244 | }
245 |
246 |
247 | // ----------------------------------------------------------------------------------------------------------------- Methods
248 |
249 |
250 | /**
251 | * Set the allowed schemes.
252 | *
253 | * By default `http` and `https` are allowed.
254 | *
255 | * @param array|string $allowedSchemes
256 | * The schemes to allow.
257 | * @return $this
258 | * @throws \InvalidArgumentException
259 | * If a scheme is empty or contains illegal characters.
260 | */
261 | public function setAllowedSchemes($allowedSchemes)
262 | {
263 | if (empty($allowedSchemes)) {
264 | throw new \InvalidArgumentException("Allowed schemes cannot be empty.");
265 | }
266 |
267 | $allowedSchemes = (array) $allowedSchemes;
268 | $c = count($allowedSchemes);
269 | for ($i = 0; $i < $c; ++$i) {
270 | if (empty($allowedSchemes[$i])) {
271 | throw new \InvalidArgumentException("An allowed scheme cannot be empty.");
272 | } elseif (!preg_match(static::SCHEME_PATTERN, $allowedSchemes[$i])) {
273 | throw new \InvalidArgumentException("Allowed scheme [{$allowedSchemes[$i]}] contains illegal characters (see RFC3986).");
274 | }
275 | }
276 | $this->allowedSchemes = $allowedSchemes;
277 |
278 | return $this;
279 | }
280 |
281 | /**
282 | * Reset all properties to their defaults.
283 | *
284 | * @return $this
285 | */
286 | public function reset()
287 | {
288 | $this->domain = null;
289 | $this->fragment = null;
290 | $this->hostname = null;
291 | $this->ipv4 = null;
292 | $this->ipv6 = null;
293 | $this->password = null;
294 | $this->path = null;
295 | $this->port = null;
296 | $this->query = null;
297 | $this->scheme = null;
298 | $this->tld = null;
299 | $this->url = null;
300 | $this->username = null;
301 |
302 | return $this;
303 | }
304 |
305 | /**
306 | * Validate the URL.
307 | *
308 | * The various URL parts are exported to class scope, have a look at the public properties of this class. Note that
309 | * changing any of the properties does not alter the URL itself which this instance represents.
310 | *
311 | * @param string $url
312 | * The URL to set.
313 | * @return $this
314 | * @throws \InvalidArgumentException
315 | * If the URL is empty or invalid.
316 | */
317 | public function validate($url)
318 | {
319 | $this->reset();
320 |
321 | if ($url === null || $url === "") {
322 | throw new \InvalidArgumentException("URL cannot be empty.");
323 | }
324 |
325 | // No need to continue with boolean, float, integer, or what not since they will never contain a valid URL.
326 | if (!is_string($url) && !(is_object($url) && method_exists($url, "__toString"))) {
327 | throw new \InvalidArgumentException("URL must be representable as string.");
328 | }
329 |
330 | // NFC form is a requirement for a valid URL.
331 | if (strlen($url) !== strlen(utf8_decode($url)) && $url !== \Normalizer::normalize($url, \Normalizer::NFC)) {
332 | throw new \InvalidArgumentException("URL must be in Unicode normalization form NFC.");
333 | }
334 |
335 | if (!preg_match(sprintf(static::URL_PATTERN, implode("|", $this->allowedSchemes)), $url, $matches)) {
336 | throw new \InvalidArgumentException("URL [{$url}] is invalid.");
337 | }
338 |
339 | foreach ($matches as $property => $value) {
340 | if (!is_numeric($property) && !empty($value)) {
341 | $this->{$property} = $value;
342 | }
343 | }
344 |
345 | // TODO: Incorporate IPv6 validation into regular expression for JavaScript usage.
346 | if (isset($this->ipv6) && !filter_var(substr($this->ipv6, 1, -1), FILTER_VALIDATE_IP, FILTER_FLAG_IPV6 | FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
347 | // @codeCoverageIgnoreStart
348 | $e = new \InvalidArgumentException("IPv6 address {$this->ipv6} is invalid.");
349 | $this->reset();
350 | throw $e;
351 | // @codeCoverageIgnoreEnd
352 | }
353 |
354 | $this->url = $url;
355 |
356 | return $this;
357 | }
358 |
359 | }
360 |
--------------------------------------------------------------------------------
/test/Fleshgrinder/Validator/URLTest.php:
--------------------------------------------------------------------------------
1 |
28 | * @copyright 2010-2015 Diego Perini & Richard Fussenegger
29 | * @license MIT
30 | */
31 | final class URLTest extends \PHPUnit_Framework_TestCase
32 | {
33 |
34 |
35 | // ----------------------------------------------------------------------------------------------------------------- Constants
36 |
37 |
38 | /**
39 | * This pattern matches all the test cases from the challenge and was the base for the regular expression I came up
40 | * with. A few tweaks here and there and it would be as good as my current regular expression is; of course Unicode
41 | * normalization to NFC is missing (done in PHP) and IPv6 validation (done in PHP) as well.
42 | *
43 | * @see https://gist.github.com/dperini/729294
44 | * @var string
45 | */
46 | const DPERINI_PATTERN = '_^(?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]-*)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]-*)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/\S*)?$_iuS';
47 |
48 |
49 | // ----------------------------------------------------------------------------------------------------------------- Data Provider
50 |
51 |
52 | /**
53 | * @return array
54 | */
55 | public function dataProviderInvalidAllowedSchemes()
56 | {
57 | return array(
58 | array(0),
59 | array(null),
60 | array(""),
61 | array(array()),
62 | array(array(0)),
63 | array(array(null)),
64 | array(array("")),
65 | array(array("scheme~", "http")),
66 | array(array("http", "42scheme")),
67 | array(array(42)),
68 | );
69 | }
70 |
71 | /**
72 | * @return array
73 | */
74 | public function dataProviderValidURLs()
75 | {
76 | return array(
77 | array("http://foo.com/blah_blah"),
78 | array("http://foo.com/blah_blah/"),
79 | array("http://foo.com/blah_blah_(wikipedia)"),
80 | array("http://foo.com/blah_blah_(wikipedia)_(again)"),
81 | array("http://www.example.com/wpstyle/?p=364"),
82 | array("https://www.example.com/foo/?bar=baz&inga=42&quux"),
83 | array("http://✪df.ws/123"),
84 | array("http://userid:password@example.com:8080"),
85 | array("http://userid:password@example.com:8080/"),
86 | array("http://userid@example.com"),
87 | array("http://userid@example.com/"),
88 | array("http://userid@example.com:8080"),
89 | array("http://userid@example.com:8080/"),
90 | array("http://userid:password@example.com"),
91 | array("http://userid:password@example.com/"),
92 | array("http://142.42.1.1/"),
93 | array("http://142.42.1.1:8080/"),
94 | array("http://➡.ws/䨹"),
95 | array("http://⌘.ws"),
96 | array("http://⌘.ws/"),
97 | array("http://foo.com/blah_(wikipedia)#cite-1"),
98 | array("http://foo.com/blah_(wikipedia)_blah#cite-1"),
99 | array("http://foo.com/unicode_(✪)_in_parens"),
100 | array("http://foo.com/(something)?after=parens"),
101 | array("http://☺.damowmow.com/"),
102 | array("http://code.google.com/events/#&product=browser"),
103 | array("http://j.mp"),
104 | array("ftp://foo.bar/baz"),
105 | array("http://foo.bar/?q=Test%20URL-encoded%20stuff"),
106 | array("http://مثال.إختبار"),
107 | array("http://xn--mgbh0fb.xn--kgbechtv"),
108 | array("http://例子.测试"),
109 | array("http://xn--fsqu00a.xn--0zwm56d"),
110 | array("http://उदाहरण.परीक्षा"),
111 | array("http://xn--p1b6ci4b4b3a.xn--11b5bs3a6bxe"),
112 | array("http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com"),
113 | array("http://1337.net"),
114 | array("http://a.b-c.de"),
115 | array("http://223.255.255.254"),
116 | array("http://Ω.com"),
117 | array("http://xn--bya.com/"),
118 | array("http://北京大学.中國"),
119 | array("http://xn--1lq90ic7fzpc.xn--fiqz9s"),
120 | array("http://xn--oogle-qmc.com"),
121 | array("http://www.öbb.at/"),
122 | array("http://www.xn--bb-eka.at"),
123 | array("http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html"),
124 | array("http://[1080:0:0:0:8:800:200C:417A]/index.html"),
125 | array("http://[3ffe:2a00:100:7031::1]"),
126 | array("http://[1080::8:800:200C:417A]/foo"),
127 | array("http://[::192.9.5.5]/ipng"),
128 | array("http://[::FFFF:129.144.52.38]:80/index.html"),
129 | array("http://[2010:836B:4179::836B:4179]"),
130 | array("http://a.b--c.de/"),
131 | array("http://www.foo´bar.com"),
132 | array("http://www.example.com?test=123"),
133 | array("http://www.example.com/path?test=123")
134 | );
135 | }
136 |
137 | /**
138 | * @return array
139 | */
140 | public function dataProviderInvalidType()
141 | {
142 | return array(
143 | array(null),
144 | array(0),
145 | array(0.0),
146 | array(""),
147 | array(true),
148 | array(false),
149 | array(array()),
150 | array((object) array()),
151 | );
152 | }
153 |
154 | /**
155 | * @return array
156 | */
157 | public function dataProviderInvalidURLs()
158 | {
159 | $c1 = chr(1);
160 | $c2 = chr(2);
161 | $c3 = chr(3);
162 | $c4 = chr(4);
163 | $c5 = chr(5);
164 | $c6 = chr(6);
165 | $c7 = chr(7);
166 | $c8 = chr(8);
167 | $c14 = chr(14);
168 | $c15 = chr(15);
169 | $c16 = chr(16);
170 | $c17 = chr(17);
171 | $c18 = chr(18);
172 | $c19 = chr(19);
173 | $c20 = chr(20);
174 | $c21 = chr(21);
175 | $c22 = chr(22);
176 | $c23 = chr(23);
177 | $c24 = chr(24);
178 | $c127 = chr(127);
179 |
180 | return array(
181 | array("http://"),
182 | array("http://."),
183 | array("http://.."),
184 | array("http://../"),
185 | array("http://?"),
186 | array("http://??"),
187 | array("http://??/"),
188 | array("http://#"),
189 | array("http://##"),
190 | array("http://##/"),
191 | array("http://foo.bar?q=Spaces should be encoded"),
192 | array("//"),
193 | array("//a"),
194 | array("///a"),
195 | array("///"),
196 | array("http:///a"),
197 | array("foo.com"),
198 | array("rdar://1234"),
199 | array("h://test"),
200 | array("http:// shouldfail.com"),
201 | array(":// should fail"),
202 | array("http://foo.bar/foo(bar)baz quux"),
203 | array("ftps://foo.bar/"),
204 | array("http://-error-.invalid/"),
205 | array("http://-a.b.co"),
206 | array("http://a.b-.co"),
207 | array("http://0.0.0.0"),
208 | array("http://10.1.1.0"),
209 | array("http://10.1.1.255"),
210 | array("http://224.1.1.1"),
211 | array("http://1.1.1.1.1"),
212 | array("http://123.123.123"),
213 | array("http://3628126748"),
214 | array("http://.www.foo.bar/"),
215 | array("http://www.foo.bar./"),
216 | array("http://.www.foo.bar./"),
217 | array("http://10.1.1.1"),
218 | array("http://203.0.113.0"),
219 | array("http://198.51.100.0"),
220 | array("http://192.0.2.0"),
221 | array("http://example.com/path\0/to\0/file"),
222 | array("http://example.com/path{$c1}/to{$c1}/file"),
223 | array("http://example.com/path{$c2}/to{$c2}/file"),
224 | array("http://example.com/path{$c3}/to{$c3}/file"),
225 | array("http://example.com/path{$c4}/to{$c4}/file"),
226 | array("http://example.com/path{$c5}/to{$c5}/file"),
227 | array("http://example.com/path{$c6}/to{$c6}/file"),
228 | array("http://example.com/path{$c7}/to{$c7}/file"),
229 | array("http://example.com/path{$c8}/to{$c8}/file"),
230 | array("http://example.com/path\t/to\t/file"),
231 | array("http://example.com/path\n/to\n/file"),
232 | array("http://example.com/path\v/to\v/file"),
233 | array("http://example.com/path\f/to\f/file"),
234 | array("http://example.com/path\r/to\r/file"),
235 | array("http://example.com/path{$c14}/to{$c14}/file"),
236 | array("http://example.com/path{$c15}/to{$c15}/file"),
237 | array("http://example.com/path{$c16}/to{$c16}/file"),
238 | array("http://example.com/path{$c17}/to{$c17}/file"),
239 | array("http://example.com/path{$c18}/to{$c18}/file"),
240 | array("http://example.com/path{$c19}/to{$c19}/file"),
241 | array("http://example.com/path{$c20}/to{$c20}/file"),
242 | array("http://example.com/path{$c21}/to{$c21}/file"),
243 | array("http://example.com/path{$c22}/to{$c22}/file"),
244 | array("http://example.com/path{$c23}/to{$c23}/file"),
245 | array("http://example.com/path{$c24}/to{$c24}/file"),
246 | array("http://example.com/path{$c127}/to{$c127}/file"),
247 | array("http://www.foo\0bar.com"),
248 | array("http://www.foo{$c1}bar.com"),
249 | array("http://www.foo{$c2}bar.com"),
250 | array("http://www.foo{$c3}bar.com"),
251 | array("http://www.foo{$c4}bar.com"),
252 | array("http://www.foo{$c5}bar.com"),
253 | array("http://www.foo{$c6}bar.com"),
254 | array("http://www.foo{$c7}bar.com"),
255 | array("http://www.foo{$c8}bar.com"),
256 | array("http://www.foo\tbar.com"),
257 | array("http://www.foo\nbar.com"),
258 | array("http://www.foo\vbar.com"),
259 | array("http://www.foo\fbar.com"),
260 | array("http://www.foo\rbar.com"),
261 | array("http://www.foo{$c14}bar.com"),
262 | array("http://www.foo{$c15}bar.com"),
263 | array("http://www.foo{$c16}bar.com"),
264 | array("http://www.foo{$c17}bar.com"),
265 | array("http://www.foo{$c18}bar.com"),
266 | array("http://www.foo{$c19}bar.com"),
267 | array("http://www.foo{$c20}bar.com"),
268 | array("http://www.foo{$c21}bar.com"),
269 | array("http://www.foo{$c22}bar.com"),
270 | array("http://www.foo{$c23}bar.com"),
271 | array("http://www.foo{$c24}bar.com"),
272 | array("http://www.foo{$c127}bar.com"),
273 | array("http://www.foo bar.com"),
274 | array("http://www.foo!bar.com"),
275 | array('http://www.foo"bar.com'),
276 | array("http://www.foo#bar.com"),
277 | array('http://www.foo$bar.com'),
278 | array("http://www.foo%bar.com"),
279 | array("http://www.foo&bar.com"),
280 | array("http://www.foo'bar.com"),
281 | array("http://www.foo(bar.com"),
282 | array("http://www.foo)bar.com"),
283 | array("http://www.foo*bar.com"),
284 | array("http://www.foo+bar.com"),
285 | array("http://www.foo,bar.com"),
286 | array("http://www.foo:bar.com"),
287 | array("http://www.foo;bar.com"),
288 | array("http://www.foobar.com"),
291 | array("http://www.foo?bar.com"),
292 | array("http://www.foo[bar.com"),
293 | array('http://www.foo\bar.com'),
294 | array("http://www.foo]bar.com"),
295 | array("http://www.foo^bar.com"),
296 | array("http://www.foo_bar.com"),
297 | array("http://www.foo`bar.com"),
298 | array("http://www.foo{bar.com"),
299 | array("http://www.foo}bar.com"),
300 | array("http://www.foo|bar.com"),
301 | array("http://www.foobár.com"), // Unicode normalization NFD
302 | // Hyphens on the thrid and fourth position are not allowed because they would collide with Punnycode.
303 | // http://www.unicode.org/reports/tr46/#Validity_Criteria
304 | array("http://fo--o.com"),
305 | );
306 | }
307 |
308 |
309 | // ----------------------------------------------------------------------------------------------------------------- Tests
310 |
311 |
312 | /**
313 | * @covers ::__construct
314 | * @covers ::setAllowedSchemes
315 | */
316 | public function testValidAllowedSchemes()
317 | {
318 | $allowedSchemes = array("http", "https", "ftp", "ftps", "file");
319 | $url = new URL(null, $allowedSchemes);
320 | $property = new \ReflectionProperty($url, "allowedSchemes");
321 | $property->setAccessible(true);
322 | $this->assertEquals($allowedSchemes, $property->getValue($url));
323 | }
324 |
325 | /**
326 | * @covers ::setAllowedSchemes
327 | * @uses ::__construct
328 | * @dataProvider dataProviderInvalidAllowedSchemes
329 | * @expectedException \InvalidArgumentException
330 | * @param mixed $allowedSchemes
331 | */
332 | public function testSetAllowedSchemes($allowedSchemes)
333 | {
334 | $url = new URL();
335 | $url->setAllowedSchemes($allowedSchemes);
336 | }
337 |
338 | /**
339 | * @covers ::__construct
340 | * @covers ::reset
341 | * @covers ::validate
342 | */
343 | public function testPropertyExport()
344 | {
345 | $url = new URL("https://richard:42secret@www2.example.com:8080/path/to/file?key=value;#fragment42");
346 | $this->assertEquals("https", $url->scheme);
347 | $this->assertEquals("richard", $url->username);
348 | $this->assertEquals("42secret", $url->password);
349 | $this->assertEquals("www2.example.com", $url->hostname);
350 | $this->assertEquals("www2.example", $url->domain);
351 | $this->assertEquals("com" ,$url->tld);
352 | $this->assertEquals(8080, $url->port);
353 | $this->assertEquals("/path/to/file", $url->path);
354 | $this->assertEquals("key=value;", $url->query);
355 | $this->assertEquals("fragment42", $url->fragment);
356 | $this->assertNull($url->ipv4);
357 | $this->assertNull($url->ipv6);
358 | }
359 |
360 | /**
361 | * @covers ::__construct
362 | * @covers ::reset
363 | * @covers ::validate
364 | */
365 | public function testPropertyExportIPv4()
366 | {
367 | $url = new URL("https://richard:42secret@142.42.1.1:8080/path/to/file?key=value;#fragment42");
368 | $this->assertEquals("https", $url->scheme);
369 | $this->assertEquals("richard", $url->username);
370 | $this->assertEquals("42secret", $url->password);
371 | $this->assertEquals("142.42.1.1", $url->hostname);
372 | $this->assertEquals("142.42.1.1", $url->ipv4);
373 | $this->assertEquals(8080, $url->port);
374 | $this->assertEquals("/path/to/file", $url->path);
375 | $this->assertEquals("key=value;", $url->query);
376 | $this->assertEquals("fragment42", $url->fragment);
377 | $this->assertNull($url->domain);
378 | $this->assertNull($url->ipv6);
379 | $this->assertNull($url->tld);
380 | }
381 |
382 | /**
383 | * @covers ::__construct
384 | * @covers ::reset
385 | * @covers ::validate
386 | */
387 | public function testPropertyExportIPv6()
388 | {
389 | $url = new URL("https://richard:42secret@[2010:836B:4179::836B:4179]:8080/path/to/file?key=value;#fragment42");
390 | $this->assertEquals("https", $url->scheme);
391 | $this->assertEquals("richard", $url->username);
392 | $this->assertEquals("42secret", $url->password);
393 | $this->assertEquals("[2010:836B:4179::836B:4179]", $url->hostname);
394 | $this->assertEquals("[2010:836B:4179::836B:4179]", $url->ipv6);
395 | $this->assertEquals(8080, $url->port);
396 | $this->assertEquals("/path/to/file", $url->path);
397 | $this->assertEquals("key=value;", $url->query);
398 | $this->assertEquals("fragment42", $url->fragment);
399 | $this->assertNull($url->domain);
400 | $this->assertNull($url->ipv4);
401 | $this->assertNull($url->tld);
402 | }
403 |
404 | /**
405 | * @covers ::__construct
406 | * @covers ::__toString
407 | * @covers ::reset
408 | * @covers ::validate
409 | * @dataProvider dataProviderValidURLs
410 | * @param string $url
411 | */
412 | public function testValidURLs($url)
413 | {
414 | $instance = new URL($url);
415 | $this->assertEquals($url, $instance->__toString());
416 | }
417 |
418 | /**
419 | * @covers ::__construct
420 | * @covers ::reset
421 | * @covers ::validate
422 | * @dataProvider dataProviderInvalidType
423 | * @dataProvider dataProviderInvalidURLs
424 | * @expectedException \InvalidArgumentException
425 | * @param string $url
426 | */
427 | public function testInvalidURLs($url)
428 | {
429 | $instance = new URL();
430 | $instance->validate($url);
431 | // The following will only execute if no exception was thrown; otherwise we will not know which URL it was.
432 | $this->assertTrue(false, $url);
433 | }
434 |
435 | /**
436 | * Note that this test is not meant to illustrate that his regular expression is not good, it is to illustrate that
437 | * the challenge does not cover all possible URL constructs.
438 | *
439 | * @coversNothing
440 | * @dataProvider dataProviderValidURLs
441 | * @param string $url
442 | */
443 | public function testValidURLsWithDiegoPerinisRegularExpression($url)
444 | {
445 | $this->assertTrue((boolean) preg_match(self::DPERINI_PATTERN, $url), $url);
446 | }
447 |
448 | /**
449 | * Note that this test is not meant to illustrate that his regular expression is not good, it is to illustrate that
450 | * the challenge does not cover all possible URL constructs.
451 | *
452 | * @coversNothing
453 | * @dataProvider dataProviderInvalidURLs
454 | * @param string $url
455 | */
456 | public function testInvalidURLsWithDiegoPerinisRegularExpression($url)
457 | {
458 | $this->assertFalse((boolean) preg_match(self::DPERINI_PATTERN, $url), $url);
459 | }
460 |
461 | }
462 |
--------------------------------------------------------------------------------