├── .gitignore ├── LICENSE ├── README.md ├── composer.json ├── src └── urljoin.php └── tests ├── cases.json ├── tests.php └── tests.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | vendor/ 3 | .idea/ 4 | composer.lock 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 j. shagam 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # php-urljoin 2 | A PHP library function for joining a base URL and a potentially-relative target URL into an absolute URL 3 | 4 | Why isn't this in the PHP standard library? WHO KNOWS. 5 | 6 | Installation: 7 | 8 | * Direct install: just pull `src/urljoin.php` into your project 9 | * [Composer](https://packagist.org/packages/busybee/urljoin): `composer require busybee/urljoin` 10 | 11 | usage: 12 | 13 | urljoin($base_url, $other_url); 14 | 15 | See tests.php for test suite, as well as expected inputs and outputs. (Doing 16 | this correctly is *way* more complicated thing than you'd think!) This 17 | implementation strives for accuracy and, in cases of ambiguity (such as `../` 18 | stack underflow and empty path components), defers to Python's standard library 19 | behavior (as implemented by `urlparse.urljoin`). 20 | 21 | Other things that you might know this as: 22 | 23 | * Relative path concatenation 24 | * A function for converting a relative path to an absolute URL given a base URL 25 | * `rel2abs` and `relativeToAbsolute` (common names for this sort of function) 26 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "busybee/urljoin", 3 | "description": "Library to correctly join relative and absolute URLs", 4 | "type": "library", 5 | "version": "1.0.0", 6 | "keywords": ["rel2abs", "relative2absolute", "urllib", "urljoin", "url-concatenation", "http"], 7 | "license": "MIT", 8 | "authors": [ 9 | { 10 | "name": "j 'fluffy' shagam", 11 | "email": "fluffy@beesbuzz.biz", 12 | "homepage": "https://beesbuzz.biz/" 13 | } 14 | ], 15 | "require": { 16 | "php": ">=5.0.0" 17 | }, 18 | "autoload": { 19 | "files": [ 20 | "src/urljoin.php" 21 | ] 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/urljoin.php: -------------------------------------------------------------------------------- 1 | $rel); 36 | } 37 | 38 | if (array_key_exists('path', $pbase) && $pbase['path'] === '/') { 39 | unset($pbase['path']); 40 | } 41 | 42 | if (isset($prel['scheme'])) { 43 | if ($prel['scheme'] != $pbase['scheme'] || in_array($prel['scheme'], $uses_relative) == false) { 44 | return $rel; 45 | } 46 | } 47 | 48 | $merged = array_merge($pbase, $prel); 49 | 50 | // Handle relative paths: 51 | // 'path/to/file.ext' 52 | // './path/to/file.ext' 53 | if (array_key_exists('path', $prel) && substr($prel['path'], 0, 1) != '/') { 54 | 55 | // Normalize: './path/to/file.ext' => 'path/to/file.ext' 56 | if (substr($prel['path'], 0, 2) === './') { 57 | $prel['path'] = substr($prel['path'], 2); 58 | } 59 | 60 | if (array_key_exists('path', $pbase)) { 61 | $dir = preg_replace('@/[^/]*$@', '', $pbase['path']); 62 | $merged['path'] = $dir . '/' . $prel['path']; 63 | } else { 64 | $merged['path'] = '/' . $prel['path']; 65 | } 66 | 67 | } 68 | 69 | if(array_key_exists('path', $merged)) { 70 | // Get the path components, and remove the initial empty one 71 | $pathParts = explode('/', $merged['path']); 72 | array_shift($pathParts); 73 | 74 | $path = []; 75 | $prevPart = ''; 76 | foreach ($pathParts as $part) { 77 | if ($part == '..' && count($path) > 0) { 78 | // Cancel out the parent directory (if there's a parent to cancel) 79 | $parent = array_pop($path); 80 | // But if it was also a parent directory, leave it in 81 | if ($parent == '..') { 82 | array_push($path, $parent); 83 | array_push($path, $part); 84 | } 85 | } else if ($prevPart != '' || ($part != '.' && $part != '')) { 86 | // Don't include empty or current-directory components 87 | if ($part == '.') { 88 | $part = ''; 89 | } 90 | array_push($path, $part); 91 | } 92 | $prevPart = $part; 93 | } 94 | $merged['path'] = '/' . implode('/', $path); 95 | } 96 | 97 | $ret = ''; 98 | if (isset($merged['scheme'])) { 99 | $ret .= $merged['scheme'] . ':'; 100 | } 101 | 102 | if (isset($merged['scheme']) || isset($merged['host'])) { 103 | $ret .= '//'; 104 | } 105 | 106 | if (isset($prel['host'])) { 107 | $hostSource = $prel; 108 | } else { 109 | $hostSource = $pbase; 110 | } 111 | 112 | // username, password, and port are associated with the hostname, not merged 113 | if (isset($hostSource['host'])) { 114 | if (isset($hostSource['user'])) { 115 | $ret .= $hostSource['user']; 116 | if (isset($hostSource['pass'])) { 117 | $ret .= ':' . $hostSource['pass']; 118 | } 119 | $ret .= '@'; 120 | } 121 | $ret .= $hostSource['host']; 122 | if (isset($hostSource['port'])) { 123 | $ret .= ':' . $hostSource['port']; 124 | } 125 | } 126 | 127 | if (isset($merged['path'])) { 128 | $ret .= $merged['path']; 129 | } 130 | 131 | if (isset($prel['query'])) { 132 | $ret .= '?' . $prel['query']; 133 | } 134 | 135 | if (isset($prel['fragment'])) { 136 | $ret .= '#' . $prel['fragment']; 137 | } 138 | 139 | return $ret; 140 | } 141 | -------------------------------------------------------------------------------- /tests/cases.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_comment": "A file in the same directory", 4 | "cases": [ 5 | ["http://example.com/foo/bar", "test.jpg", "http://example.com/foo/test.jpg"] 6 | ] 7 | }, 8 | { 9 | "_comment": "A file in a subdirectory", 10 | "cases": [ 11 | ["http://example.com/foo/bar", "images/test.jpg", "http://example.com/foo/images/test.jpg"], 12 | ["http://example.com/foo/bar", "./images/test.jpg", "http://example.com/foo/images/test.jpg"], 13 | ["http://example.com/foo/bar/", "images/test.jpg", "http://example.com/foo/bar/images/test.jpg"], 14 | ["http://example.com/foo/bar/", "./images/test.jpg", "http://example.com/foo/bar/images/test.jpg"] 15 | ] 16 | }, 17 | { 18 | "_comment": "A file in the root directory", 19 | "cases": [ 20 | ["http://example.com/foo/bar", "/test.jpg", "http://example.com/test.jpg"] 21 | ] 22 | }, 23 | { 24 | "_comment": "A file in the parent directory", 25 | "cases": [ 26 | ["http://example.com/foo/bar/baz", "../test.jpg", "http://example.com/foo/test.jpg"] 27 | ] 28 | }, 29 | { 30 | "_comment": "A file more directories up than there are directories to escape", 31 | "cases": [ 32 | ["http://example.com/foo/bar/baz/quux", "../../../../../test.jpg", "http://example.com/../../test.jpg"] 33 | ] 34 | }, 35 | { 36 | "_comment": "The current directory itself", 37 | "cases": [ 38 | ["http://example.com/foo/", ".", "http://example.com/foo/"], 39 | ["http://example.com/foo/", "./", "http://example.com/foo/"], 40 | ["http://example.com/foo", ".", "http://example.com/"], 41 | ["http://example.com/foo", "./", "http://example.com/"] 42 | ] 43 | }, 44 | { 45 | "_comment": "Different server, same scheme", 46 | "cases": [ 47 | ["http://example.com/foo/bar", "//other.example/test.jpg", "http://other.example/test.jpg"], 48 | ["https://example.com/foo/bar", "//other.example/test.jpg", "https://other.example/test.jpg"] 49 | ] 50 | }, 51 | { 52 | "_comment": "Ensure queries work right", 53 | "cases": [ 54 | ["https://example.com/foo/bar.cgi?hello=goodbye", "moo.cgi?yes=no", "https://example.com/foo/moo.cgi?yes=no"], 55 | ["http://example.com/foo/?qwer=poiu", "bar", "http://example.com/foo/bar"], 56 | ["http://example.com/foo/", "bar?qwer=poiu", "http://example.com/foo/bar?qwer=poiu"], 57 | ["http://example.com/foo/", "?qwer=poiu", "http://example.com/foo/?qwer=poiu"] 58 | ] 59 | }, 60 | { 61 | "_comment": "Users and passwords should transfer for relative links", 62 | "cases": [ 63 | ["http://alicebob:password123@example.com/foo/bar", ".", "http://alicebob:password123@example.com/foo/"], 64 | ["http://alicebob:password123@example.com/foo/bar", "/test/url", "http://alicebob:password123@example.com/test/url"], 65 | ["http://spambot@example.com/foo/bar", "/test/url", "http://spambot@example.com/test/url"] 66 | ] 67 | }, 68 | { 69 | "_comment": "But shouldn't transfer to other servers", 70 | "cases": [ 71 | ["https://alicebob:password123@example.com/foo/bar", "//other.example/test/url", "https://other.example/test/url"] 72 | ] 73 | }, 74 | { 75 | "_comment": "Port specifiers", 76 | "cases": [ 77 | ["https://example.com:8000/foo/bar", "//other.example/test/url", "https://other.example/test/url"], 78 | ["https://example.com:8000/foo/bar", "/test/url", "https://example.com:8000/test/url"] 79 | ] 80 | }, 81 | { 82 | "_comment": "File paths are fiddly", 83 | "cases": [ 84 | ["file:///path/to/file", "other-file", "file:///path/to/other-file"], 85 | ["/path/to/file", "other-file", "/path/to/other-file"] 86 | ] 87 | }, 88 | { 89 | "_comment": "Anchors are too", 90 | "cases": [ 91 | ["http://example.com/test/foo#anchor", "bar", "http://example.com/test/bar"], 92 | ["http://example.com/test/foo", "bar#anchor", "http://example.com/test/bar#anchor"], 93 | ["http://example.com", "#anchor", "http://example.com#anchor"] 94 | ] 95 | }, 96 | { 97 | "_comment": "Mixing non-relative and relative url", 98 | "cases": [ 99 | ["http://example.com/foo/bar", "javascript:void(0)", "javascript:void(0)"] 100 | ] 101 | }, 102 | { 103 | "_comment": "Sanity checks", 104 | "cases": [ 105 | ["http://example.com/foo/bar", false, "http://example.com/foo/bar"], 106 | [false, "http://example.com/foo/bar", "http://example.com/foo/bar"] 107 | ] 108 | }, 109 | { 110 | "_comment": "URL already valid (various kinds), base may vary", 111 | "cases": [ 112 | ["https://example.com", "https://example.com", "https://example.com"], 113 | ["https://example.com/", "https://example.com", "https://example.com"], 114 | ["https://example.com/", "https://example.com/", "https://example.com/"], 115 | ["https://example.com/", "https://example.com/#test", "https://example.com/#test"] 116 | ] 117 | }, 118 | { 119 | "_comment": "Same server, different scheme, URL already valid", 120 | "cases": [ 121 | ["http://example.com", "https://example.com", "https://example.com"], 122 | ["https://example.com", "http://example.com", "http://example.com"] 123 | ] 124 | }, 125 | { 126 | "_comment": "Colons in path", 127 | "cases": [ 128 | ["https://example.com", "/user/posts/1907564412639039:0", "https://example.com/user/posts/1907564412639039:0"], 129 | ["https://example.com", "/user/posts/1907564412639039:123", "https://example.com/user/posts/1907564412639039:123"], 130 | ["https://example.com", "/user/posts/1907564412639039:8", "https://example.com/user/posts/1907564412639039:8"], 131 | ["https://example.com", "/user/posts/1907564412639039:abc", "https://example.com/user/posts/1907564412639039:abc"], 132 | ["https://example.com/user/", "posts/1907564412639039:0", "https://example.com/user/posts/1907564412639039:0"], 133 | ["https://example.com/user/", "posts/1907564412639039:456", "https://example.com/user/posts/1907564412639039:456"], 134 | ["https://example.com/user/", "posts/1907564412639039:7", "https://example.com/user/posts/1907564412639039:7"], 135 | ["https://example.com/user/", "posts/1907564412639039:def", "https://example.com/user/posts/1907564412639039:def"], 136 | ["https://example.com/user/", "http://example.com/posts/1907564412639039:456", "http://example.com/posts/1907564412639039:456"], 137 | ["https://example.com/user/", "http://example.com:9090/posts/1907564412639039:456", "http://example.com:9090/posts/1907564412639039:456"] 138 | ] 139 | } 140 | ] 141 | -------------------------------------------------------------------------------- /tests/tests.php: -------------------------------------------------------------------------------- 1 | 16 | tests 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | "; 40 | if ($out == $expected) { 41 | echo ''; 42 | } else { 43 | echo ''; 44 | } 45 | echo ''; 46 | } 47 | 48 | foreach(json_decode(file_get_contents(__DIR__ . '/cases.json'), true) as $item) { 49 | foreach($item['cases'] as $case) { 50 | test($case[0], $case[1], $case[2]); 51 | } 52 | } 53 | 54 | ?> 55 |
No.BaseURLExpected / Result
$base$url' . $out . '' . $out . ' (expected: ' . $expected . ')
56 | -------------------------------------------------------------------------------- /tests/tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import json 3 | import os 4 | import sys 5 | 6 | if sys.version_info[0] == 2: 7 | from urlparse import urljoin 8 | else: 9 | print("python v2 is expected") 10 | sys.exit(1) 11 | 12 | __DIR__ = os.path.dirname(os.path.realpath(__file__)) 13 | 14 | bad_cases = [] 15 | return_code = 0 16 | overall_cases_count = 0 17 | 18 | f = open(__DIR__ + "/cases.json", "r") 19 | 20 | for item in json.loads(f.read()): 21 | for case in item['cases']: 22 | overall_cases_count += 1 23 | if urljoin(case[0], case[1]) != case[2]: 24 | bad_cases.append([case[0], case[1], urljoin(case[0], case[1]), case[2]]) 25 | 26 | f.close() 27 | 28 | if len(bad_cases) == 0: 29 | print("OK. {} case(s) read".format(overall_cases_count)) 30 | else: 31 | return_code = 1 32 | for case in bad_cases: 33 | print("{} + {} = {} (expected {})".format(case[0], case[1], case[2], case[3])) 34 | 35 | sys.exit(return_code) 36 | --------------------------------------------------------------------------------