├── HTMLProxyHandler.class.php ├── ProxyHandler.class.php ├── README.md ├── composer.json └── examples ├── .htaccess ├── advanced.php ├── cloudflare.php ├── errors.php ├── https.php └── index.php /HTMLProxyHandler.class.php: -------------------------------------------------------------------------------- 1 | _proxyBaseUri = $options['proxyBaseUri']; 28 | if (!isset($options['bufferedContentTypes'])) 29 | $options['bufferedContentTypes'] = array('text/html', 'text/css', 'text/javascript', 'application/javascript'); 30 | if (isset($options['anchorTarget'])) 31 | $this->_anchorTarget = $options['anchorTarget']; 32 | parent::__construct($options); 33 | 34 | // build base URI 35 | $translatedUri = $this->getTranslatedUri(); 36 | $parsed_url = parse_url($translatedUri); 37 | if (!isset($parsed_url['scheme'])) 38 | $parsed_url['scheme'] = 'http'; 39 | $path = isset($parsed_url['path']) ? $parsed_url['path'] : '/'; 40 | $this->_baseUri = self::unparse_url_base($parsed_url) . $path; 41 | } 42 | 43 | protected static function unparse_url_base($parsed_url) 44 | { 45 | $scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : ''; 46 | $host = isset($parsed_url['host']) ? $parsed_url['host'] : ''; 47 | $port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : ''; 48 | $user = isset($parsed_url['user']) ? $parsed_url['user'] : ''; 49 | $pass = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : ''; 50 | $pass = ($user || $pass) ? "$pass@" : ''; 51 | return "$scheme$user$pass$host$port"; 52 | } 53 | 54 | protected static function unparse_url_request($parsed_url) 55 | { 56 | $path = isset($parsed_url['path']) ? $parsed_url['path'] : ''; 57 | $query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : ''; 58 | $fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : ''; 59 | return "$path$query$fragment"; 60 | } 61 | 62 | public function getProxyBaseUri() 63 | { 64 | return $this->_proxyBaseUri; 65 | } 66 | 67 | /** 68 | /* Converts relative URLs to absolute ones, given a base URL. 69 | /* Modified version of code found at http://nashruddin.com/PHP_Script_for_Converting_Relative_to_Absolute_URL 70 | /* 71 | /* taken from miniProxy: https://github.com/joshdick/miniProxy 72 | */ 73 | public function rel2abs($rel) 74 | { 75 | if (preg_match(',^{{.*}}$,', $rel)) 76 | return $rel; 77 | 78 | if (empty($rel)) 79 | $rel = "."; 80 | if (parse_url($rel, PHP_URL_SCHEME) != "") { 81 | // return if already an absolute URL 82 | return $rel; 83 | } 84 | if (strpos($rel, '//') === 0) { 85 | // prepend scheme 86 | return parse_url($this->_baseUri, PHP_URL_SCHEME) . ':' . $rel; 87 | } 88 | 89 | if ($rel[0] == "#" || $rel[0] == "?") 90 | return $this->_baseUri.$rel; //Queries and anchors 91 | 92 | extract(parse_url($this->_baseUri)); //Parse base URL and convert to local variables: $scheme, $host, $path 93 | $path = isset($path) ? preg_replace('#/[^/]*$#', "", $path) : "/"; //Remove non-directory element from path 94 | if ($rel[0] == '/') $path = ""; //Destroy path if relative url points to root 95 | if (isset($port) && (($scheme == "http" && $port != 80) || ($scheme == "https" && $port != 443))) 96 | $port = ":" . $port; 97 | else 98 | $port = ""; 99 | $auth = ""; 100 | if (isset($user)) { 101 | $auth = $user; 102 | if (isset($pass)) { 103 | $auth .= ":" . $pass; 104 | } 105 | $auth .= "@"; 106 | } 107 | $abs = "$auth$host$path$port/$rel"; //Dirty absolute URL 108 | for ($n = 1; $n > 0; $abs = preg_replace(array("#(/\.?/)#", "#/(?!\.\.)[^/]+/\.\./#"), "/", $abs, -1, $n)) {} //Replace '//' or '/./' or '/foo/../' with '/' 109 | return $scheme . "://" . $abs; //Absolute URL is ready. 110 | } 111 | 112 | /** 113 | /* Replace text content of DOMNode 114 | */ 115 | public static function replaceTextContent($n, $value) 116 | { 117 | // $n->nodeValue = htmlspecialchars($value, ENT_NOQUOTES); 118 | while ($n->hasChildNodes()) { 119 | $n->removeChild($n->firstChild); 120 | } 121 | 122 | $n->appendChild($n->ownerDocument->createTextNode($value)); 123 | } 124 | 125 | /** 126 | * Proxify URL 127 | */ 128 | public function proxifyURL($url, $parsed_url = null, $is_redirect = false) 129 | { 130 | if (preg_match(',^{{.*}}$,', $url)) 131 | return $url; 132 | 133 | $proxy_base_uri = $this->getProxyBaseUri(); 134 | if (substr($url, 0, strlen($proxy_base_uri)) == $proxy_base_uri) 135 | return $url; 136 | 137 | if (!$parsed_url) 138 | $parsed_url = parse_url($url); 139 | 140 | $scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] : ''; 141 | if ($scheme != "http" && $scheme != "https" && $scheme != "ftp") 142 | return $url; 143 | 144 | return $proxy_base_uri . $url; 145 | } 146 | 147 | /** 148 | * Proxify CSS 149 | * 150 | * taken from miniProxy https://github.com/joshdick/miniProxy 151 | */ 152 | protected function proxifyCSS($buffer) 153 | { 154 | $proxy = $this; 155 | return preg_replace_callback( 156 | '/url\((.*?)\)/i', 157 | function($matches) use ($proxy) { 158 | $url = $matches[1]; 159 | // Remove any surrounding single or double quotes from the URL so it can be passed to rel2abs - the quotes are optional in CSS 160 | // Assume that if there is a leading quote then there should be a trailing quote, so just use trim() to remove them 161 | if (strpos($url, "'") === 0) { 162 | $url = trim($url, "'"); 163 | } 164 | if (strpos($url, "\"") === 0) { 165 | $url = trim($url, "\""); 166 | } 167 | if (stripos($url, "data:") === 0) { 168 | // the url isn't an HTTP URL but is actual binary data. Don't proxify it 169 | return "url($url)"; 170 | } 171 | $new_url = $proxy->proxifyURL($proxy->rel2abs($url)); 172 | return "url($new_url)"; 173 | }, 174 | $buffer); 175 | } 176 | 177 | // Proxify XMLHttpRequest 178 | protected function proxifyXMLHttpRequest($elem) 179 | { 180 | // Attempt to force AJAX requests to be made through the proxy by 181 | // wrapping window.XMLHttpRequest.prototype.open in order to make 182 | // all request URLs absolute and point back to the proxy. 183 | // The rel2abs() JavaScript function serves the same purpose as the server-side one in this file, 184 | // but is used in the browser to ensure all AJAX request URLs are absolute and not relative. 185 | // Uses code from these sources: 186 | // http://stackoverflow.com/questions/7775767/javascript-overriding-xmlhttprequest-open 187 | // https://gist.github.com/1088850 188 | // TODO: This is obviously only useful for browsers that use XMLHttpRequest but 189 | // it's better than nothing. 190 | 191 | //Only bother trying to apply this hack if the DOM has a or element; 192 | //insert some JavaScript at the top of whichever is available first. 193 | //Protects against cases where the server sends a Content-Type of "text/html" when 194 | //what's coming back is most likely not actually HTML. 195 | //TODO: Do this check before attempting to do any sort of DOM parsing? 196 | $script = $elem->ownerDocument->createElement("script"); 197 | self::replaceTextContent($script, 198 | '(function() { 199 | window.proxifyURL = function(url) { 200 | function parseURI(url) { 201 | var m = String(url).replace(/^\s+|\s+$/g, "").match(/^([^:\/?#]+:)?(\/\/(?:[^:@]*(?::[^:@]*)?@)?(([^:\/?#]*)(?::(\d*))?))?([^?#]*)(\?[^#]*)?(#[\s\S]*)?/); 202 | // authority = "//" + user + ":" + pass "@" + hostname + ":" port 203 | return (m ? { 204 | href : m[0] || "", 205 | protocol : m[1] || "", 206 | authority: m[2] || "", 207 | host : m[3] || "", 208 | hostname : m[4] || "", 209 | port : m[5] || "", 210 | pathname : m[6] || "", 211 | search : m[7] || "", 212 | hash : m[8] || "" 213 | } : null); 214 | } 215 | 216 | function rel2abs(base, href) { // RFC 3986 217 | function removeDotSegments(input) { 218 | var output = []; 219 | input.replace(/^(\.\.?(\/|$))+/, "") 220 | .replace(/\/(\.(\/|$))+/g, "/") 221 | .replace(/\/\.\.$/, "/../") 222 | .replace(/\/?[^\/]*/g, function (p) { 223 | if (p === "/..") { 224 | output.pop(); 225 | } else { 226 | output.push(p); 227 | } 228 | }); 229 | return output.join("").replace(/^\//, input.charAt(0) === "/" ? "/" : ""); 230 | } 231 | 232 | base = parseURI(base || ""); 233 | 234 | return !href || !base ? null : (href.protocol || base.protocol) + 235 | (href.protocol || href.authority ? href.authority : base.authority) + 236 | removeDotSegments(href.protocol || href.authority || href.pathname.charAt(0) === "/" ? href.pathname : (href.pathname ? ((base.authority && !base.pathname ? "/" : "") + base.pathname.slice(0, base.pathname.lastIndexOf("/") + 1) + href.pathname) : base.pathname)) + 237 | (href.protocol || href.authority || href.pathname ? href.search : (href.search || base.search)) + 238 | href.hash; 239 | } 240 | 241 | if (url == null || url == "" || url.indexOf("'.$this->getProxyBaseUri().'") === 0) 242 | return url; 243 | href = parseURI(url || ""); 244 | if (href.protocol && href.protocol != "http:" && href.protocol != "https:" && href.protocol != "ftp:") 245 | return url; 246 | 247 | url = rel2abs("'.$this->_baseUri.'", href); 248 | return "'.$this->getProxyBaseUri().'" + url; 249 | }; 250 | 251 | function set_property_descriptor(name, property, descriptor) 252 | { 253 | obj = document.createElement(name); 254 | try { 255 | Object.defineProperty(Object.getPrototypeOf(obj), property, descriptor); 256 | } catch (err) { 257 | //console.log("Failed to set property descriptor for " + name + " (" + property + ")"); 258 | } 259 | } 260 | 261 | if (window.XMLHttpRequest) { 262 | var proxied = window.XMLHttpRequest.prototype.open; 263 | window.XMLHttpRequest.prototype.open = function() { 264 | arguments[1] = window.proxifyURL(arguments[1]); 265 | return proxied.apply(this, [].slice.call(arguments)); 266 | }; 267 | } 268 | 269 | var src_descriptor = { 270 | get: function() { 271 | return this.getAttribute("src"); 272 | }, 273 | set: function(val) { 274 | this.setAttribute("src", window.proxifyURL(val)); 275 | }, 276 | }; 277 | set_property_descriptor("img", "src", src_descriptor); 278 | set_property_descriptor("script", "src", src_descriptor); 279 | 280 | var href_descriptor = { 281 | get: function() { 282 | return this.getAttribute("href"); 283 | }, 284 | set: function(val) { 285 | this.setAttribute("href", window.proxifyURL(val)); 286 | }, 287 | }; 288 | set_property_descriptor("a", "href", href_descriptor); 289 | })();' 290 | ); 291 | $script->setAttribute("type", "text/javascript"); 292 | 293 | $elem->insertBefore($script, $elem->firstChild); 294 | } 295 | 296 | /** 297 | * Proxify HTML 298 | * 299 | * partially taken from miniProxy https://github.com/joshdick/miniProxy 300 | */ 301 | protected function proxifyHTML($buffer) 302 | { 303 | static $html_links = array( 304 | 'a' => 'href', 305 | 'area' => 'href', 306 | 'link' => 'href', 307 | 'img' => array('src', 'longdesc', 'usemap'), 308 | 'object' => array('classid', 'codebase', 'data', 'usemap'), 309 | 'q' => 'cite', 310 | 'blockquote' => 'cite', 311 | 'ins' => 'cite', 312 | 'del' => 'cite', 313 | 'form' => 'action', 314 | 'input' => array('src', 'usemap'), 315 | 'head' => 'profile', 316 | 'base' => 'href', 317 | 'script' => array('src', 'for') 318 | ); 319 | static $html_links_xpath; 320 | if (!$html_links_xpath) { 321 | foreach ($html_links as $e => &$attrs) { 322 | if (is_string($attrs)) 323 | $attrs = array($attrs); 324 | foreach ($attrs as $a) { 325 | if ($html_links_xpath) 326 | $html_links_xpath .= ' | '; 327 | $html_links_xpath .= '//' . $e . '[@' . $a . ']'; 328 | } 329 | } 330 | } 331 | 332 | $detectedEncoding = mb_detect_encoding($buffer, "UTF-8, ISO-8859-1"); 333 | if ($detectedEncoding) { 334 | $buffer = mb_convert_encoding($buffer, "HTML-ENTITIES", $detectedEncoding); 335 | } 336 | 337 | $xpath = $this->loadHTML($buffer); 338 | 339 | // proxify html links 340 | foreach ($xpath->query($html_links_xpath) as $e) { 341 | if (!array_key_exists($e->nodeName, $html_links)) 342 | continue; 343 | 344 | foreach ($html_links[$e->nodeName] as $a) { 345 | $value = $e->getAttribute($a); 346 | if (!$value) 347 | continue; 348 | 349 | $new_value = $this->proxifyURL($this->rel2abs($value)); 350 | if ($new_value != $value) { 351 | $e->setAttribute($a, $new_value); 352 | } 353 | if ($e->nodeName == 'a' && $this->_anchorTarget) { 354 | if (!$e->getAttribute('target')) { 355 | $e->setAttribute('target', $this->_anchorTarget); 356 | } 357 | } 358 | } 359 | } 360 | 361 | // proxify tags with a "style" attribute 362 | foreach ($xpath->query('//*[@style]') as $e) { 363 | $value = $e->getAttribute('style'); 364 | $new_value = $this->proxifyCSS($value); 365 | if ($new_value != $value) { 366 | $e->setAttribute('style', $new_value); 367 | } 368 | } 369 | 370 | // proxify