├── .gitignore ├── LICENSE ├── Readme.md ├── composer.json ├── install.php ├── listupdater.php ├── lookup.php └── phpgsb.class.php /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2015, Sam Cleaver 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the organization nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Implementation of Google Safe Browsing 2 | 3 | **phpGSB** is a robust implementation of the Google Safe Browsing API. It currently *does* allow the following: 4 | 5 | * Updating of GSB lists to a MySQL database 6 | * Basic checking of URLs' against lists and then full-hash checks against the full GSB database 7 | * Caching of full-hash keys to minimise requests to the remote Google server 8 | 9 | At current it *does not* allow the following: 10 | 11 | * Requests using MAC keys (integrity checks) 12 | 13 | The main class is definitely not as efficient as it could be and has developed very quickly to meet the basic GSB specification; any contributions, bug fixes etc are **very** welcome! 14 | 15 | ## Download 16 | 17 | * Installation using composer 18 | ``` 19 | $ composer require samcleaver/phpgsb 20 | ``` 21 | 22 | * You could manually download phpgsb by this link: https://github.com/samcleaver/phpGSB/archive/0.2.6.zip 23 | 24 | ## Installation 25 | 26 | 1. Enter database details into install.php (Replace DATABASE_USERNAME, DATABASE_NAME and DATABASE_PASSWORD with respective information) 27 | 2. Run install.php 28 | 3. Look at listupdater.php and lookup.php example files for basic methods on using the system. 29 | 4. If you choose to use listupdater.php as-is then set it as a cron job/scheduled task to run every minute. *(It won't actually update every minute but is required incase of backoff procedures and timeouts)* 30 | 31 | ## FAQ 32 | 33 | * **When I do a lookup, phpGSB says the URL is safe but I know it's not.** 34 | *The database is updated in chunks from Google's central server. Because of this, you need to run updates for 24 hours before you can start doing lookups, this is a limitation of the specification and not the implementation. (Check Step 5 of installation on how to ensure updates are running.)* 35 | 36 | ## License 37 | 38 | The phpGSB library is released under the New BSD License. 39 | 40 | ``` 41 | Copyright (c) 2010-2015, Sam Cleaver 42 | 43 | Redistribution and use in source and binary forms, with or without 44 | modification, are permitted provided that the following conditions are met: 45 | * Redistributions of source code must retain the above copyright 46 | notice, this list of conditions and the following disclaimer. 47 | * Redistributions in binary form must reproduce the above copyright 48 | notice, this list of conditions and the following disclaimer in the 49 | documentation and/or other materials provided with the distribution. 50 | * Neither the name of the organization nor the 51 | names of its contributors may be used to endorse or promote products 52 | derived from this software without specific prior written permission. 53 | 54 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 55 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 56 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 57 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 58 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 59 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 60 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 61 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 62 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 | ``` 65 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name":"samcleaver/phpgsb", 3 | "description":"PHP Google Safe Browsing", 4 | "keywords":["php","google","gsf", "google safe browsing"], 5 | "type":"library", 6 | "license":"BSD-3-Clause", 7 | "version": "0.2.7", 8 | "authors":[ 9 | { 10 | "name":"Sam Cleaver", 11 | "homepage":"https://github.com/samcleaver" 12 | } 13 | ], 14 | "homepage": "https://github.com/samcleaver/phpgsb", 15 | "require":{ 16 | "php":">=5.3.0" 17 | }, 18 | "autoload":{ 19 | "classmap": [ 20 | "phpgsb.class.php" 21 | ] 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /install.php: -------------------------------------------------------------------------------- 1 | usinglists = array('googpub-phish-shavar','goog-malware-shavar', 'goog-unwanted-shavar'); 13 | //Install MySQL tables 14 | foreach($phpgsb->usinglists as $value) 15 | { 16 | //Create ADD tables 17 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-a-hosts` ( 18 | `ID` int(255) NOT NULL auto_increment, 19 | `Hostkey` varchar(8) NOT NULL, 20 | `Chunknum` int(255) NOT NULL, 21 | `Count` varchar(2) NOT NULL default '0', 22 | `FullHash` varchar(70) NOT NULL, 23 | PRIMARY KEY (`ID`), 24 | KEY `Hostkey` (`Hostkey`) 25 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); 26 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-a-index` ( 27 | `ChunkNum` int(255) NOT NULL auto_increment, 28 | `Chunklen` int(255) NOT NULL default '0', 29 | PRIMARY KEY (`ChunkNum`) 30 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); 31 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-a-prefixes` ( 32 | `ID` int(255) NOT NULL auto_increment, 33 | `Hostkey` varchar(8) NOT NULL, 34 | `Prefix` varchar(255) NOT NULL, 35 | `FullHash` varchar(70) NOT NULL, 36 | PRIMARY KEY (`ID`), 37 | KEY `Hostkey` (`Hostkey`) 38 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); 39 | //Create SUB tables 40 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-s-hosts` ( 41 | `ID` int(255) NOT NULL auto_increment, 42 | `Hostkey` varchar(8) NOT NULL, 43 | `Chunknum` int(255) NOT NULL, 44 | `Count` varchar(2) NOT NULL default '0', 45 | `FullHash` varchar(70) NOT NULL, 46 | PRIMARY KEY (`ID`), 47 | KEY `Hostkey` (`Hostkey`) 48 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); 49 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-s-index` ( 50 | `ChunkNum` int(255) NOT NULL auto_increment, 51 | `Chunklen` int(255) NOT NULL default '0', 52 | PRIMARY KEY (`ChunkNum`) 53 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); 54 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-s-prefixes` ( 55 | `ID` int(255) NOT NULL auto_increment, 56 | `Hostkey` varchar(8) NOT NULL, 57 | `AddChunkNum` varchar(8) NOT NULL, 58 | `Prefix` varchar(255) NOT NULL, 59 | `FullHash` varchar(70) NOT NULL, 60 | PRIMARY KEY (`ID`), 61 | KEY `Hostkey` (`Hostkey`) 62 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); 63 | } 64 | //Check timeout files writable 65 | if(file_put_contents("testfile.dat","TEST PRE-USE PHPGSB ".time())) 66 | unlink("testfile.dat"); 67 | else 68 | echo "ERROR: THIS DIRECTORY IS NOT WRITABLE, CHMOD to 775 or 777"; 69 | ?> -------------------------------------------------------------------------------- /listupdater.php: -------------------------------------------------------------------------------- 1 | apikey = "API_KEY_HERE"; 14 | $phpgsb->usinglists = array('googpub-phish-shavar','goog-malware-shavar', 'goog-unwanted-shavar'); 15 | $phpgsb->runUpdate(); 16 | $phpgsb->close(); 17 | ?> -------------------------------------------------------------------------------- /lookup.php: -------------------------------------------------------------------------------- 1 | apikey = "API_KEY_HERE"; 14 | $phpgsb->usinglists = array('googpub-phish-shavar','goog-malware-shavar', 'goog-unwanted-shavar'); 15 | //Should return false (not phishing or malware) 16 | var_dump($phpgsb->doLookup('http://www.google.com')); 17 | //Should return true, malicious URL 18 | var_dump($phpgsb->doLookup('http://www.gumblar.cn')); 19 | $phpgsb->close(); 20 | ?> -------------------------------------------------------------------------------- /phpgsb.class.php: -------------------------------------------------------------------------------- 1 | phpGSB($database, $username, $password, $host, $port, $verbose); 31 | } 32 | function phpGSB($database=false,$username=false,$password=false,$host="localhost",$port=3306,$verbose=true) 33 | { 34 | if(!$verbose) 35 | $this->silent(); 36 | $this->outputmsg("phpGSB Loaded"); 37 | if($database&&$username) 38 | $this->dbConnect($database,$username,$password,$host,$port); 39 | } 40 | function close() 41 | { 42 | mysqli_close($this->dbLink); 43 | $this->outputmsg("Closing phpGSB. (Peak Memory: ".(round(memory_get_peak_usage()/1048576,3))."MB)"); 44 | } 45 | function silent() 46 | { 47 | $this->verbose = false; 48 | } 49 | function trans_disable() 50 | { 51 | $this->transenabled = false; 52 | } 53 | function trans_enable() 54 | { 55 | $this->transenabled = true; 56 | } 57 | function trans_begin() 58 | { 59 | if($this->transenabled) 60 | { 61 | $this->transtarted = true; 62 | $this->outputmsg("Begin MySQL Transaction"); 63 | mysqli_query($this->dbLink, "BEGIN"); 64 | } 65 | } 66 | function trans_commit() 67 | { 68 | if($this->transtarted&&mysqli_ping($this->dbLink)&&$this->transenabled) 69 | { 70 | $this->transtarted = false; 71 | $this->outputmsg("Comitting Transaction"); 72 | mysqli_query($this->dbLink, "COMMIT"); 73 | } 74 | } 75 | function trans_rollback() 76 | { 77 | if($this->transtarted&&mysqli_ping($this->dbLink)&&$this->transenabled) 78 | { 79 | $this->transtarted = false; 80 | $this->outputmsg("Rolling Back Transaction"); 81 | mysqli_query($this->dbLink, "ROLLBACK"); 82 | } 83 | } 84 | /*Function to output messages, used instead of echo, 85 | will make it easier to have a verbose switch in later 86 | releases*/ 87 | function outputmsg($msg) 88 | { 89 | if($this->verbose) 90 | { 91 | ob_start(); 92 | echo $msg.'...
'; 93 | $this->ob .= ob_get_contents(); 94 | ob_end_flush(); 95 | } 96 | } 97 | /*Function to output errors, used instead of echo, 98 | will make it easier to have a verbose switch in later 99 | releases*/ 100 | function fatalerror($msg) 101 | { 102 | if($this->verbose) 103 | { 104 | ob_start(); 105 | print_r($msg); 106 | echo '...
'; 107 | $this->ob .= ob_get_contents(); 108 | ob_end_flush(); 109 | } 110 | $this->trans_rollback(); 111 | die(); 112 | } 113 | /*Wrapper to connect to database. Simples.*/ 114 | function dbConnect($database,$username,$password,$host="localhost",$port=3306) 115 | { 116 | $this->dbLink = mysqli_connect($host, $username, $password, '', $port); 117 | if (!$this->dbLink) { 118 | $this->fatalerror('Could not connect: ' . mysqli_error($this->dbLink)); 119 | } 120 | $this->outputmsg('Connected successfully to database server'); 121 | $db_selected = mysqli_select_db($this->dbLink, $database); 122 | if (!$db_selected) { 123 | $this->fatalerror('Can\'t use $database : ' . mysqli_error($this->dbLink)); 124 | } 125 | $this->outputmsg('Connected to database successfully'); 126 | } 127 | public function getDbLink() 128 | { 129 | return $this->dbLink; 130 | } 131 | /*Simple logic function to calculate timeout 132 | based on the number of previous errors*/ 133 | function calc($errors) 134 | { 135 | //According to Developer Guide Formula 136 | if($errors==1) 137 | { 138 | //According to Developer Guide (1st error, wait a minute) 139 | return 60; 140 | } 141 | elseif($errors>5) 142 | { 143 | //According to Developer Guide (Above 5 errors check every 4 hours) 144 | return 28800; 145 | } 146 | else 147 | { 148 | //According to Developer Guide we simply double up our timeout each time and use formula: 149 | //(Adapted to be relative to errors) ( ((2^$errors) * 7.5) * (decimalrand(0,1) + 1)) to produce 150 | // a result between: 120min-240min for example 151 | return floor((pow(2,$errors) * 7.5) * ((rand(0,1000)/1000) + 1)); 152 | } 153 | } 154 | /*Writes backoff timeouts, uses calc() to 155 | calculate timeouts and then writes to file 156 | for next check*/ 157 | function Backoff($errdata=false,$type) 158 | { 159 | if($type=="data") 160 | $file = 'nextcheck.dat'; 161 | else 162 | $file = 'nextcheckl.dat'; 163 | $curstatus = explode('||',file_get_contents($this->pingfilepath.$file)); 164 | $curstatus[1] = $curstatus[1] + 1; 165 | $seconds = $this->calc($curstatus[1]); 166 | $until = time()+$seconds.'||'.$curstatus[1]; 167 | file_put_contents($this->pingfilepath.$file,$until); 168 | $this->fatalerror(array("Invalid Response... Backing Off",$errdata)); 169 | } 170 | /*Writes timeout from valid requests to nextcheck file*/ 171 | function setTimeout($seconds) 172 | { 173 | if (file_exists($this->pingfilepath.'nextcheck.dat')) { 174 | $curstatus = explode('||',@file_get_contents($this->pingfilepath.'nextcheck.dat')); 175 | $until = time()+$seconds.'||'.$curstatus[1]; 176 | } else { 177 | $until = time()+$seconds.'||'; 178 | } 179 | file_put_contents($this->pingfilepath.'nextcheck.dat',$until); 180 | } 181 | /*Checks timeout in timeout files (usually performed at the 182 | start of script)*/ 183 | function checkTimeout($type) 184 | { 185 | if($type=="data") 186 | $file = 'nextcheck.dat'; 187 | else 188 | $file = 'nextcheckl.dat'; 189 | $curstatus = explode('||',file_get_contents($this->pingfilepath.$file)); 190 | if(time()<$curstatus[0]) 191 | { 192 | $this->fatalerror("Must wait another ".($curstatus[0]-time()). " seconds before another request"); 193 | } 194 | else 195 | $this->outputmsg("Allowed to request"); 196 | } 197 | /*Function downloads from URL's, POST data can be 198 | passed via $options. $followbackoff indicates 199 | whether to follow backoff procedures or not*/ 200 | function googleDownloader($url,$options,$followbackoff=false) 201 | { 202 | $ch = curl_init(); 203 | curl_setopt($ch, CURLOPT_URL, $url); 204 | curl_setopt($ch, CURLOPT_HEADER, 0); 205 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 206 | 207 | if(is_array($options)) 208 | curl_setopt_array($ch, $options); 209 | 210 | $data = curl_exec($ch); 211 | $info = curl_getinfo($ch); 212 | curl_close($ch); 213 | if($followbackoff&&$info['http_code']>299) 214 | { 215 | $this->Backoff($info,$followbackoff); 216 | } 217 | return array($info,$data); 218 | } 219 | //UPDATER FUNCTIONS 220 | /*Resets lists database, only called if GSB issues r:resetdatabase*/ 221 | function resetDatabase() 222 | { 223 | //Lord knows why they would EVER issue this request! 224 | if(!empty($this->adminemail)) 225 | mail($this->adminemail,'Reset Database Request Issued','For some crazy unknown reason GSB requested a database reset at '.time()); 226 | foreach($this->usinglists as $value) 227 | { 228 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-s-index`"); 229 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-s-hosts`"); 230 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-s-prefixes`"); 231 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-a-index`"); 232 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-a-hosts`"); 233 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-a-prefixes`"); 234 | } 235 | } 236 | /*Processes data recieved from a GSB data request into a managable array*/ 237 | function processChunks($fulldata,$listname) 238 | { 239 | $subarray = array(); 240 | $addarray = array(); 241 | $loaddata = trim($fulldata); 242 | $clonedata = $loaddata; 243 | while(strlen($clonedata)>0) 244 | { 245 | $splithead = explode("\n",$clonedata,2); 246 | $chunkinfo = explode(':',$splithead[0]); 247 | $type = $chunkinfo[0]; 248 | $chunknum = $chunkinfo[1]; 249 | $hashlen = $chunkinfo[2]; 250 | $chunklen = $chunkinfo[3]; 251 | if($chunklen>0) 252 | { 253 | $tmparray = array(); 254 | //Convert to hex for easy processing 255 | //First get chunkdata according to length 256 | $chunkdata = bin2hex(substr($splithead[1],0,$chunklen)); 257 | if($type=='a') 258 | { 259 | $maini = 0; 260 | while(strlen($chunkdata)>0) 261 | { 262 | $tmparray[$maini]['HOSTKEY'] = substr($chunkdata, 0, 8); 263 | $tmparray[$maini]['COUNT'] = substr($chunkdata, 8, 2); 264 | $chunkdata = substr($chunkdata,10); 265 | $realcount = hexdec($tmparray[$maini]['COUNT']); 266 | if($realcount>0) 267 | { 268 | for ($i = 0; $i < $realcount; $i++) { 269 | $tmparray[$maini]['PAIRS'][$i]['PREFIX'] = substr($chunkdata, 0, ($hashlen*2)); 270 | $chunkdata = substr($chunkdata,(($hashlen*2))); 271 | } 272 | } 273 | elseif($realcount<0) 274 | { 275 | $this->fatalerror(array("Decoding Error, Somethings gone wrong!",$tmparray[$maini])); 276 | } 277 | $maini++; 278 | } 279 | $addarray['CHUNKNUM'] = $chunknum; 280 | $addarray['HASHLEN'] = $hashlen; 281 | $addarray['CHUNKLEN'] = $chunklen; 282 | $addarray['REAL'] = $tmparray; 283 | $this->saveChunkPart($addarray,"ADD",$listname); 284 | unset($addarray); 285 | } 286 | elseif($type=='s') 287 | { 288 | $maini = 0; 289 | while(strlen($chunkdata)>0) 290 | { 291 | $tmparray[$maini]['HOSTKEY'] = substr($chunkdata, 0, 8); 292 | $tmparray[$maini]['COUNT'] = substr($chunkdata, 8, 2); 293 | $chunkdata = substr($chunkdata,10); 294 | $realcount = hexdec($tmparray[$maini]['COUNT']); 295 | if($realcount>0) 296 | { 297 | for ($i = 0; $i < $realcount; $i++) { 298 | $tmparray[$maini]['PAIRS'][$i]['ADDCHUNKNUM'] = substr($chunkdata, 0, 8); 299 | $tmparray[$maini]['PAIRS'][$i]['PREFIX'] = substr($chunkdata, 8, ($hashlen*2)); 300 | $chunkdata = substr($chunkdata,(($hashlen*2)+8)); 301 | } 302 | } 303 | elseif($realcount==0) 304 | { 305 | $tmparray[$maini]['PAIRS'][0]['ADDCHUNKNUM'] = substr($chunkdata, 0, 8); 306 | $chunkdata = substr($chunkdata, 8); 307 | } 308 | else 309 | { 310 | $this->fatalerror(array("Decoding Error, Somethings gone wrong!",$tmparray[$maini])); 311 | } 312 | $maini++; 313 | } 314 | $subarray['CHUNKNUM'] = $chunknum; 315 | $subarray['HASHLEN'] = $hashlen; 316 | $subarray['CHUNKLEN'] = $chunklen; 317 | $subarray['REAL'] = $tmparray; 318 | $this->saveChunkPart($subarray,"SUB",$listname); 319 | unset($subarray); 320 | } 321 | else 322 | { 323 | $this->outputmsg("DISCARDED CHUNKNUM: $chunknum (Had no valid label)"); 324 | } 325 | } 326 | else 327 | { 328 | //No ChunkData, Still Insert 329 | if($type=='a') 330 | { 331 | $addarray['CHUNKNUM'] = $chunknum; 332 | $addarray['HASHLEN'] = $hashlen; 333 | $addarray['CHUNKLEN'] = $chunklen; 334 | $this->saveChunkPart($addarray,"ADD",$listname); 335 | unset($addarray); 336 | } 337 | elseif($type=='s') 338 | { 339 | $subarray['CHUNKNUM'] = $chunknum; 340 | $subarray['HASHLEN'] = $hashlen; 341 | $subarray['CHUNKLEN'] = $chunklen; 342 | $this->saveChunkPart($subarray,"SUB",$listname); 343 | unset($subarray); 344 | } 345 | else 346 | { 347 | $this->outputmsg("DISCARDED CHUNKNUM: $chunknum (Empty)"); 348 | } 349 | } 350 | $clonedata = substr($splithead[1],$chunklen); 351 | } 352 | return true; 353 | } 354 | /*Saves processed data to the MySQL database*/ 355 | function saveChunkPart($data,$type,$listname) 356 | { 357 | $listname = trim($listname); 358 | //Check what type of data it is... 359 | $buildindex = array(); 360 | $buildhost = array(); 361 | $buildpairs = array(); 362 | if($type=="SUB") 363 | { 364 | $value = $data; 365 | if(!isset($this->mainlist['s'][$listname][$value['CHUNKNUM']])) 366 | { 367 | $this->mainlist['s'][$listname][$value['CHUNKNUM']] = true; 368 | $buildindex[] = "('{$value['CHUNKNUM']}','{$value['CHUNKLEN']}')"; 369 | if($value['CHUNKLEN']>0) 370 | { 371 | foreach($value['REAL'] as $newkey=>$newvalue) 372 | { 373 | $buildhost[] = "('{$newvalue['HOSTKEY']}','{$value['CHUNKNUM']}','{$newvalue['COUNT']}','')"; 374 | if(isset($newvalue['PAIRS'])&&count($newvalue['PAIRS'])>0) 375 | { 376 | foreach($newvalue['PAIRS'] as $innerkey=>$innervalue) 377 | { 378 | if( isset($innervalue['PREFIX']) ) { 379 | $buildpairs[] = "('{$newvalue['HOSTKEY']}','{$innervalue['ADDCHUNKNUM']}','{$innervalue['PREFIX']}','')"; 380 | } else { 381 | $buildpairs[] = "('{$newvalue['HOSTKEY']}','{$innervalue['ADDCHUNKNUM']}','','')"; 382 | } 383 | } 384 | } 385 | } 386 | } 387 | } 388 | } 389 | else if($type=="ADD") 390 | { 391 | //Then lets insert add data 392 | $value = $data; 393 | if(!isset($this->mainlist['a'][$listname][$value['CHUNKNUM']])) 394 | { 395 | $this->mainlist['a'][$listname][$value['CHUNKNUM']] = true; 396 | $buildindex[] = "('{$value['CHUNKNUM']}','{$value['CHUNKLEN']}')"; 397 | if($value['CHUNKLEN']>0) 398 | { 399 | foreach($value['REAL'] as $newkey=>$newvalue) 400 | { 401 | $buildhost[] = "('{$newvalue['HOSTKEY']}','{$value['CHUNKNUM']}','{$newvalue['COUNT']}','')"; 402 | if(isset($newvalue['PAIRS'])&&count($newvalue['PAIRS'])>0) 403 | { 404 | foreach($newvalue['PAIRS'] as $innerkey=>$innervalue) 405 | { 406 | if( isset($innervalue['PREFIX']) ) { 407 | $buildpairs[] = "('{$newvalue['HOSTKEY']}','{$innervalue['PREFIX']}','')"; 408 | } else { 409 | $buildpairs[] = "('{$newvalue['HOSTKEY']}','','')"; 410 | } 411 | } 412 | } 413 | } 414 | } 415 | } 416 | } 417 | if(count($buildindex)>0) 418 | { 419 | if($type=="ADD") 420 | $listtype = 'a'; 421 | elseif($type=="SUB") 422 | $listtype = 's'; 423 | //Insert index value 424 | $indexinsert = implode(', ',$buildindex); 425 | $indexins = mysqli_query($this->dbLink, "INSERT INTO `$listname-$listtype-index` (`ChunkNum`,`Chunklen`) VALUES $indexinsert;"); 426 | $error = mysqli_error($this->dbLink); 427 | if($indexins) 428 | { 429 | if(count($buildhost)>0) 430 | { 431 | //Insert hostkeys index 432 | $hostinsert = implode(', ',$buildhost); 433 | mysqli_query($this->dbLink, "INSERT INTO `$listname-$listtype-hosts` (`Hostkey`,`Chunknum`,`Count`,`FullHash`) VALUES $hostinsert;"); 434 | $error = mysqli_error($this->dbLink); 435 | if(!empty($error)) 436 | $this->outputmsg("INSERTED $listname $type HOST KEYS ".mysqli_error($this->dbLink)); 437 | } 438 | if(count($buildpairs)>0) 439 | { 440 | //Insert prefixes 441 | $pairinsert = implode(', ',$buildpairs); 442 | if($type=="ADD") 443 | mysqli_query($this->dbLink, "INSERT INTO `$listname-$listtype-prefixes` (`Hostkey`,`Prefix`,`FullHash`) VALUES $pairinsert;"); 444 | elseif($type=="SUB") 445 | mysqli_query($this->dbLink, "INSERT INTO `$listname-$listtype-prefixes` (`Hostkey`,`AddChunkNum`,`Prefix`,`FullHash`) VALUES $pairinsert;"); 446 | $error = mysqli_error($this->dbLink); 447 | if(!empty($error)) 448 | $this->outputmsg("INSERTED $listname $type PREFIX HOST KEYS ".mysqli_error($this->dbLink)); 449 | } 450 | } 451 | elseif(!empty($error)) 452 | $this->outputmsg("COULD NOT SAVE $listname $type INDEXS ".mysqli_error($this->dbLink)); 453 | } 454 | } 455 | /*Get ranges of existing chunks from a requested list 456 | and type (add [a] or sub [s] return them and set 457 | mainlist to recieved for that chunk (prevent dupes)*/ 458 | function getRanges($listname,$mode) 459 | { 460 | $checktable = $listname.'-'.$mode.'-index'; 461 | $results = mysqli_query($this->dbLink, "SELECT ChunkNum FROM `$checktable` ORDER BY `ChunkNum` ASC"); 462 | $ranges = array(); 463 | $i = 0; 464 | $start = 0; 465 | while ($row = mysqli_fetch_array($results, MYSQLI_BOTH)) 466 | { 467 | $this->mainlist[$mode][$listname][$row['ChunkNum']] = true; 468 | if($i==0) 469 | { 470 | $start = $row['ChunkNum']; 471 | $previous = $row['ChunkNum']; 472 | } 473 | else 474 | { 475 | $expected = $previous + 1; 476 | if($row['ChunkNum']!=$expected) 477 | { 478 | if($start==$previous) 479 | $ranges[] = $start; 480 | else 481 | $ranges[] = $start.'-'.$previous; 482 | $start = $row['ChunkNum']; 483 | } 484 | $previous = $row['ChunkNum']; 485 | } 486 | $i++; 487 | } 488 | if($start>0&&$previous>0) 489 | { 490 | if($start==$previous) 491 | $ranges[] = $start; 492 | else 493 | $ranges[] = $start.'-'.$previous; 494 | } 495 | return $ranges; 496 | } 497 | /*Get both add and sub ranges for a requested list*/ 498 | function getFullRanges($listname) 499 | { 500 | $subranges = $this->getRanges($listname,'s'); 501 | $addranges = $this->getRanges($listname,'a'); 502 | return array("Subranges"=>$subranges,"Addranges"=>$addranges); 503 | } 504 | /*Format a full request body for a desired list including 505 | name and full ranges for add and sub*/ 506 | function formattedRequest($listname) 507 | { 508 | $fullranges = $this->getFullRanges($listname); 509 | $buildpart = ''; 510 | if(count($fullranges['Subranges'])>0) 511 | $buildpart .= 's:'.implode(',',$fullranges['Subranges']); 512 | if(count($fullranges['Subranges'])>0&&count($fullranges['Addranges'])>0) 513 | $buildpart .= ':'; 514 | if(count($fullranges['Addranges'])>0) 515 | $buildpart .= 'a:'.implode(',',$fullranges['Addranges']); 516 | return $listname.';'.$buildpart."\n"; 517 | } 518 | /*Called when GSB returns a SUB-DEL or ADD-DEL response*/ 519 | function deleteRange($range,$mode,$listname) 520 | { 521 | $buildtrunk = $listname.'-'.$mode; 522 | if(substr_count($range,'-')>0) 523 | { 524 | $deleterange = explode('-',trim($range)); 525 | $clause = "`ChunkNum` >= '{$deleterange[0]}' AND `ChunkNum` <= '{$deleterange[1]}'"; 526 | } 527 | else 528 | $clause = "`ChunkNum` = '$range'"; 529 | //Delete from index 530 | mysqli_query($this->dbLink, "DELETE FROM `$buildtrunk-index` WHERE $clause"); 531 | 532 | //Select all host keys that match chunks (we'll delete them after but we need the hostkeys list!) 533 | $result = mysqli_query($this->dbLink, "SELECT `Hostkey` FROM `$buildtrunk-hosts` WHERE $clause"); 534 | $buildprefixdel = array(); 535 | if($result&&mysqli_num_rows($result)>0) 536 | { 537 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC)) 538 | { 539 | if(!empty($row['Hostkey'])) 540 | $buildprefixdel[] = $row['Hostkey']; 541 | } 542 | if (count($buildprefixdel)) { 543 | //Delete all matching hostkey prefixes 544 | mysqli_query( 545 | $this->dbLink, 546 | "DELETE FROM `$buildtrunk-prefixes` WHERE `Hostkey` in ('" . implode('\',\'', $buildprefixdel) . "')" 547 | ); 548 | } 549 | 550 | //Delete all matching hostkeys 551 | mysqli_query($this->dbLink, "DELETE FROM `$buildtrunk-hosts` WHERE $clause"); 552 | } 553 | } 554 | /*Main part of updater function, will call all other functions, merely requires 555 | the request body, it will then process and save all data as well as checking 556 | for ADD-DEL and SUB-DEL, runs silently so won't return anything on success*/ 557 | function getData($body) 558 | { 559 | if(empty($body)) 560 | $this->fatalerror("Missing a body for data request"); 561 | $this->trans_begin(); 562 | $buildopts = array(CURLOPT_POST=>true,CURLOPT_POSTFIELDS=>$body."\n"); 563 | $result = $this->googleDownloader("http://safebrowsing.clients.google.com/safebrowsing/downloads?client=api&apikey=".$this->apikey."&appver=".$this->version."&pver=".$this->apiversion,$buildopts,"data"); 564 | preg_match('/^n:(.*)$/m', $result[1], $match); 565 | $timeout = $match[1]; 566 | $this->setTimeout($timeout); 567 | if(substr_count($result[1],'r:pleasereset')>0) 568 | $this->resetDatabase(); 569 | else 570 | { 571 | $formattedlist = array(); 572 | if(substr_count($result[1],'i:')>0) 573 | { 574 | $splitlists = explode('i:',$result[1]); 575 | unset($splitlists[0]); 576 | foreach($splitlists as $key=>$value) 577 | { 578 | $listdata = explode("\n",trim($value)); 579 | $listname = $listdata[0]; 580 | unset($listdata[0]); 581 | $formattedlist[$listname] = $listdata; 582 | } 583 | foreach($formattedlist as $key=>$value) 584 | { 585 | $listname = $key; 586 | foreach($value as $keyinner=>$valueinner) 587 | { 588 | if(substr_count($valueinner,"u:")>0) 589 | { 590 | $chunkdata = $this->googleDownloader('http://'.trim(str_replace('u:','',$valueinner)),false,"data"); 591 | $processed = $this->processChunks($chunkdata[1],$listname); 592 | $this->outputmsg("Saved a chunk file"); 593 | } 594 | elseif(substr_count($valueinner,"ad:")>0) 595 | { 596 | if(substr_count($valueinner,',')>0) 597 | { 598 | $valueinner = explode(',',trim(str_replace("ad:","",$valueinner))); 599 | foreach($valueinner as $keyadd=>$valueadd) 600 | { 601 | $this->deleteRange($valueadd,'a',$listname); 602 | } 603 | } 604 | else 605 | $this->deleteRange(trim(str_replace("ad:","",$valueinner)),'a',$listname); 606 | } 607 | elseif(substr_count($valueinner,"sd:")>0) 608 | { 609 | if(substr_count($valueinner,',')>0) 610 | { 611 | $valueinner = explode(',',trim(str_replace("sd:","",$valueinner))); 612 | foreach($valueinner as $keyadd=>$valueadd) 613 | { 614 | $this->deleteRange($valueadd,'s',$listname); 615 | } 616 | } 617 | else 618 | $this->deleteRange(trim(str_replace("sd:","",$valueinner)),'s',$listname); 619 | } 620 | } 621 | 622 | } 623 | } 624 | else 625 | { 626 | $this->outputmsg('No data available in list'); 627 | } 628 | } 629 | $this->trans_commit(); 630 | return true; 631 | } 632 | /*Shortcut to run updater*/ 633 | function runUpdate() 634 | { 635 | $this->checkTimeout('data'); 636 | $require = ""; 637 | foreach($this->usinglists as $value) 638 | $require .= $this->formattedRequest($value); 639 | $this->outputmsg("Using $require"); 640 | $this->getData($require); 641 | } 642 | //LOOKUP FUNCTIONS 643 | /*Used to check the canonicalize function*/ 644 | function validateMethod() 645 | { 646 | //Input => Expected 647 | $cases = array( 648 | "http://host/%25%32%35" => "http://host/%25", 649 | "http://host/%25%32%35%25%32%35" => "http://host/%25%25", 650 | "http://host/%2525252525252525" => "http://host/%25", 651 | "http://host/asdf%25%32%35asd" => "http://host/asdf%25asd", 652 | "http://host/%%%25%32%35asd%%" => "http://host/%25%25%25asd%25%25", 653 | "http://www.google.com/" => "http://www.google.com/", 654 | "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/" => "http://168.188.99.26/.secure/www.ebay.com/", 655 | "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/" => "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/", 656 | "http://host%23.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B" => 'http://host%23.com/~a!b@c%23d$e%25f^00&11*22(33)44_55+', 657 | "http://3279880203/blah" => "http://195.127.0.11/blah", 658 | "http://www.google.com/blah/.." => "http://www.google.com/", 659 | "www.google.com/" => "http://www.google.com/", 660 | "www.google.com" => "http://www.google.com/", 661 | "http://www.evil.com/blah#frag" => "http://www.evil.com/blah", 662 | "http://www.GOOgle.com/" => "http://www.google.com/", 663 | "http://www.google.com.../" => "http://www.google.com/", 664 | "http://www.google.com/foo\tbar\rbaz\n2" => "http://www.google.com/foobarbaz2", 665 | "http://www.google.com/q?" => "http://www.google.com/q?", 666 | "http://www.google.com/q?r?" => "http://www.google.com/q?r?", 667 | "http://www.google.com/q?r?s" => "http://www.google.com/q?r?s", 668 | "http://evil.com/foo#bar#baz" => "http://evil.com/foo", 669 | "http://evil.com/foo;" => "http://evil.com/foo;", 670 | "http://evil.com/foo?bar;" => "http://evil.com/foo?bar;", 671 | "http://\x01\x80.com/" => "http://%01%80.com/", 672 | "http://notrailingslash.com" => "http://notrailingslash.com/", 673 | "http://www.gotaport.com:1234/" => "http://www.gotaport.com:1234/", 674 | " http://www.google.com/ " => "http://www.google.com/", 675 | "http:// leadingspace.com/" => "http://%20leadingspace.com/", 676 | "http://%20leadingspace.com/" => "http://%20leadingspace.com/", 677 | "%20leadingspace.com/" => "http://%20leadingspace.com/", 678 | "https://www.securesite.com/" => "https://www.securesite.com/", 679 | "http://host.com/ab%23cd" => "http://host.com/ab%23cd", 680 | "http://host.com//twoslashes?more//slashes" => "http://host.com/twoslashes?more//slashes" 681 | ); 682 | foreach($cases as $key=>$value) 683 | { 684 | $canit = $this->Canonicalize($key); 685 | $canit = $canit['GSBURL']; 686 | if($canit==$value) 687 | $this->outputmsg("PASSED: $key"); 688 | else 689 | $this->outputmsg("INVALID:
ORIGINAL: $key
EXPECTED: $value
RECIEVED: $canit
"); 690 | 691 | } 692 | } 693 | /*Special thanks Steven Levithan (stevenlevithan.com) for the ridiculously complicated regex 694 | required to parse urls. This is used over parse_url as it robustly provides access to 695 | port, userinfo etc and handles mangled urls very well. 696 | Expertly integrated into phpGSB by Sam Cleaver ;) 697 | Thanks to mikegillis677 for finding the seg. fault issue in the old function. 698 | Passed validateMethod() check on 17/01/12*/ 699 | function j_parseUrl($url) 700 | { 701 | $strict = '/^(?:([^:\/?#]+):)?(?:\/\/\/?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?(((?:\/(\w:))?((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/'; 702 | $loose = '/^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/\/?)?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?)(((?:\/(\w:))?(\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/'; 703 | preg_match($loose, $url, $match); 704 | if(empty($match)) 705 | { 706 | //As odd as its sounds, we'll fall back to strict (as technically its more correct and so may salvage completely mangled urls) 707 | unset($match); 708 | preg_match($strict, $url, $match); 709 | } 710 | $parts = array("source"=>'',"scheme"=>'',"authority"=>'',"userinfo"=>'',"user"=>'',"password"=>'',"host"=>'',"port"=>'',"relative"=>'',"path"=>'',"drive"=>'',"directory"=>'',"file"=>'',"query"=>'',"fragment"=>''); 711 | switch (count ($match)) { 712 | case 15: $parts['fragment'] = $match[14]; 713 | case 14: $parts['query'] = $match[13]; 714 | case 13: $parts['file'] = $match[12]; 715 | case 12: $parts['directory'] = $match[11]; 716 | case 11: $parts['drive'] = $match[10]; 717 | case 10: $parts['path'] = $match[9]; 718 | case 9: $parts['relative'] = $match[8]; 719 | case 8: $parts['port'] = $match[7]; 720 | case 7: $parts['host'] = $match[6]; 721 | case 6: $parts['password'] = $match[5]; 722 | case 5: $parts['user'] = $match[4]; 723 | case 4: $parts['userinfo'] = $match[3]; 724 | case 3: $parts['authority'] = $match[2]; 725 | case 2: $parts['scheme'] = $match[1]; 726 | case 1: $parts['source'] = $match[0]; 727 | } 728 | return $parts; 729 | } 730 | /*Regex to check if its a numerical IP address*/ 731 | function is_ip($ip) 732 | { 733 | return preg_match("/^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])" . 734 | "(\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}$/", $ip); 735 | } 736 | /*Checks if input is in hex format*/ 737 | function is_hex($x) 738 | { 739 | //Relys on the fact that hex often includes letters meaning PHP will disregard the string 740 | if(($x+3) == 3) 741 | return dechex(hexdec($x)) == $x; 742 | return false; 743 | } 744 | /*Checks if input is in octal format*/ 745 | function is_octal($x) 746 | { 747 | //Relys on the fact that in IP addressing octals must begin with a 0 to denote octal 748 | return substr($x,0,1) == 0; 749 | } 750 | /*Converts hex or octal input into decimal */ 751 | function hexoct2dec($value) 752 | { 753 | //As this deals with parts in IP's we can be more exclusive 754 | if(substr_count(substr($value,0,2),'0x')>0&&$this->is_hex($value)) 755 | { 756 | return hexdec($value); 757 | } 758 | elseif($this->is_octal($value)) 759 | { 760 | return octdec($value); 761 | } 762 | else 763 | return false; 764 | } 765 | /*Converts IP address part in HEX to decimal*/ 766 | function iphexdec($hex) 767 | { 768 | //Removes any leading 0x (used to denote hex) and then and leading 0's) 769 | $temp = str_replace('0x','',$hex); 770 | $temp = ltrim($temp,"0"); 771 | return hexdec($temp); 772 | } 773 | /*Converts full IP address in HEX to decimal*/ 774 | function hexIPtoIP($hex) 775 | { 776 | //Remove hex identifier and leading 0's (not significant) 777 | $tempip = str_replace('0x','',$hex); 778 | $tempip = ltrim($tempip,"0"); 779 | //It might be hex 780 | if($this->is_hex($tempip)) 781 | { 782 | //There may be a load of junk before the part we need 783 | if(strlen($tempip)>8) 784 | { 785 | $tempip = substr($tempip,-8); 786 | } 787 | $hexplode = preg_split('//', $tempip, -1, PREG_SPLIT_NO_EMPTY); 788 | while(count($hexplode)<8) 789 | array_unshift($hexplode,0); 790 | //Normalise 791 | $newip = hexdec($hexplode[0].$hexplode[1]).'.'.hexdec($hexplode[2].$hexplode[3]).'.'.hexdec($hexplode[4].$hexplode[5]).'.'.hexdec($hexplode[6].$hexplode[7]); 792 | //Now check if its an IP 793 | if($this->is_ip($newip)) 794 | return $newip; 795 | else 796 | return false; 797 | } 798 | else 799 | return false; 800 | } 801 | /*Checks if an IP provided in either hex, octal or decimal is in fact 802 | an IP address. Normalises to a four part IP address.*/ 803 | function isValid_IP($ip) 804 | { 805 | //First do a simple check, if it passes this no more needs to be done 806 | if($this->is_ip($ip)) 807 | return $ip; 808 | 809 | //Its a toughy... eerm perhaps its all in hex? 810 | $checkhex = $this->hexIPtoIP($ip); 811 | if($checkhex) 812 | return $checkhex; 813 | 814 | //If we're still here it wasn't hex... maybe a DWORD format? 815 | $checkdword = $this->hexIPtoIP(dechex($ip)); 816 | if($checkdword) 817 | return $checkdword; 818 | 819 | //Nope... maybe in octal or a combination of standard, octal and hex?! 820 | $ipcomponents = explode('.',$ip); 821 | $ipcomponents[0] = $this->hexoct2dec($ipcomponents[0]); 822 | if(count($ipcomponents)==2) 823 | { 824 | //The writers of the RFC docs certainly didn't think about the clients! This could be a DWORD mixed with an IP part 825 | if($ipcomponents[0]<=255&&is_int($ipcomponents[0])&&is_int($ipcomponents[1])) 826 | { 827 | $threeparts = dechex($ipcomponents[1]); 828 | $hexplode = preg_split('//', $threeparts, -1, PREG_SPLIT_NO_EMPTY); 829 | if(count($hexplode)>4) 830 | { 831 | $newip = $ipcomponents[0].'.'.$this->iphexdec($hexplode[0].$hexplode[1]).'.'.$this->iphexdec($hexplode[2].$hexplode[3]).'.'.$this->iphexdec($hexplode[4].$hexplode[5]); 832 | //Now check if its valid 833 | if($this->is_ip($newip)) 834 | return $newip; 835 | } 836 | } 837 | } 838 | $ipcomponents[1] = $this->hexoct2dec($ipcomponents[1]); 839 | if(count($ipcomponents)==3) 840 | { 841 | //Guess what... it could also be a DWORD mixed with two IP parts! 842 | if(($ipcomponents[0]<=255&&is_int($ipcomponents[0]))&&($ipcomponents[1]<=255&&is_int($ipcomponents[1]))&&is_int($ipcomponents[2])) 843 | { 844 | $twoparts = dechex($ipcomponents[2]); 845 | $hexplode = preg_split('//', $twoparts, -1, PREG_SPLIT_NO_EMPTY); 846 | if(count($hexplode)>3) 847 | { 848 | $newip = $ipcomponents[0].'.'.$ipcomponents[1].'.'.$this->iphexdec($hexplode[0].$hexplode[1]).'.'.$this->iphexdec($hexplode[2].$hexplode[3]); 849 | //Now check if its valid 850 | if($this->is_ip($newip)) 851 | return $newip; 852 | } 853 | } 854 | } 855 | //If not it may be a combination of hex and octal 856 | if(count($ipcomponents)>=4) 857 | { 858 | $tmpcomponents = array($ipcomponents[2],$ipcomponents[3]); 859 | foreach($tmpcomponents as $key=>$value) 860 | { 861 | if(!$tmpcomponents[$key] = $this->hexoct2dec($value)) 862 | return false; 863 | } 864 | 865 | array_unshift($tmpcomponents,$ipcomponents[0],$ipcomponents[1]); 866 | //Convert back to IP form 867 | $newip = implode('.',$tmpcomponents); 868 | 869 | //Now check if its valid 870 | if($this->is_ip($newip)) 871 | return $newip; 872 | } 873 | 874 | //Well its not an IP that we can recognise... theres only so much we can do! 875 | return false; 876 | } 877 | /*Had to write another layer as built in PHP urlencode() escapes all non 878 | alpha-numeric Google states to only urlencode if its below 32 or above 879 | or equal to 127 (some of those are non alpha-numeric and so urlencode 880 | on its own won't work).*/ 881 | function flexURLEncode($url,$ignorehash=false) 882 | { 883 | //Had to write another layer as built in PHP urlencode() escapes all non alpha-numeric 884 | //google states to only urlencode if its below 32 or above or equal to 127 (some of those 885 | //are non alpha-numeric and so urlencode on its own won't work). 886 | $urlchars = preg_split('//', $url, -1, PREG_SPLIT_NO_EMPTY); 887 | if(count($urlchars)>0) 888 | { 889 | foreach($urlchars as $key=>$value) 890 | { 891 | 892 | $ascii = ord($value); 893 | if($ascii<=32||$ascii>=127||($value=='#'&&!$ignorehash)||$value=='%') 894 | $urlchars[$key] = rawurlencode($value); 895 | } 896 | return implode('',$urlchars); 897 | } 898 | else 899 | return $url; 900 | } 901 | /*Canonicalize a full URL according to Google's definition.*/ 902 | function Canonicalize($url) 903 | { 904 | //Remove line feeds, return carriages, tabs, vertical tabs 905 | $finalurl = trim(str_replace(array("\x09","\x0A","\x0D","\x0B"),'',$url)); 906 | //URL Encode for easy extraction 907 | $finalurl = $this->flexURLEncode($finalurl,true); 908 | //Now extract hostname & path 909 | $parts = $this->j_parseUrl($finalurl); 910 | $hostname = $parts['host']; 911 | $path = $parts['path']; 912 | $query = $parts['query']; 913 | $lasthost = ""; 914 | $lastpath = ""; 915 | $lastquery = ""; 916 | //Remove all hex coding (loops max of 50 times to stop craziness but should never 917 | //reach that) 918 | for ($i = 0; $i < 50; $i++) { 919 | $hostname = rawurldecode($hostname); 920 | $path = rawurldecode($path); 921 | $query = rawurldecode($query); 922 | if($hostname==$lasthost&&$path==$lastpath&&$query==$lastquery) 923 | break; 924 | $lasthost = $hostname; 925 | $lastpath = $path; 926 | $lastquery = $query; 927 | } 928 | //Deal with hostname first 929 | //Replace all leading and trailing dots 930 | $hostname = trim($hostname,'.'); 931 | //Replace all consecutive dots with one dot 932 | $hostname = preg_replace("/\.{2,}/",".",$hostname); 933 | //Make it lowercase 934 | $hostname = strtolower($hostname); 935 | //See if its a valid IP 936 | $hostnameip = $this->isValid_IP($hostname); 937 | if($hostnameip) 938 | { 939 | $usingip = true; 940 | $usehost = $hostnameip; 941 | } 942 | else 943 | { 944 | $usingip = false; 945 | $usehost = $hostname; 946 | } 947 | //The developer guide has lowercasing and validating IP other way round but its more efficient to 948 | //have it this way 949 | //Now we move onto canonicalizing the path 950 | $pathparts = explode('/',$path); 951 | foreach($pathparts as $key=>$value) 952 | { 953 | if($value=="..") 954 | { 955 | if($key!=0) 956 | { 957 | unset($pathparts[$key-1]); 958 | unset($pathparts[$key]); 959 | } 960 | else 961 | unset($pathparts[$key]); 962 | } 963 | elseif($value=="."||empty($value)) 964 | unset($pathparts[$key]); 965 | } 966 | if(substr($path,-1,1)=="/") 967 | $append = "/"; 968 | else 969 | $append = false; 970 | $path = "/".implode("/",$pathparts); 971 | if($append&&substr($path,-1,1)!="/") 972 | $path .= $append; 973 | $usehost = $this->flexURLEncode($usehost); 974 | $path = $this->flexURLEncode($path); 975 | $query = $this->flexURLEncode($query); 976 | if(empty($parts['scheme'])) 977 | $parts['scheme'] = 'http'; 978 | $canurl = $parts['scheme'].'://'; 979 | $realurl = $canurl; 980 | if(!empty($parts['userinfo'])) 981 | $realurl .= $parts['userinfo'].'@'; 982 | $canurl .= $usehost; 983 | $realurl .= $usehost; 984 | if(!empty($parts['port'])) 985 | { 986 | $canurl .= ':'.$parts['port']; 987 | $realurl .= ':'.$parts['port']; 988 | } 989 | $canurl .= $path; 990 | $realurl .= $path; 991 | if(substr_count($finalurl,"?")>0) 992 | { 993 | $canurl .= '?'.$parts['query']; 994 | $realurl .= '?'.$parts['query']; 995 | } 996 | if(!empty($parts['fragment'])) 997 | $realurl .= '#'.$parts['fragment']; 998 | return array("GSBURL"=>$canurl,"CleanURL"=>$realurl,"Parts"=>array("Host"=>$usehost,"Path"=>$path,"Query"=>$query,"IP"=>$usingip)); 999 | } 1000 | /*SHA-256 input (short method).*/ 1001 | function sha256($data) 1002 | { 1003 | return hash('sha256',$data); 1004 | } 1005 | /*Make Hostkeys for use in a lookup*/ 1006 | function makeHostKey($host,$usingip) 1007 | 1008 | { 1009 | if($usingip) 1010 | $hosts = array($host."/"); 1011 | 1012 | else 1013 | { 1014 | $hostparts = explode(".",$host); 1015 | if(count($hostparts)>2) 1016 | { 1017 | $backhostparts = array_reverse($hostparts); 1018 | $threeparts = array_slice($backhostparts,0,3); 1019 | $twoparts = array_slice($threeparts,0,2); 1020 | $hosts = array(implode('.',array_reverse($threeparts))."/",implode('.',array_reverse($twoparts))."/"); 1021 | } 1022 | else 1023 | $hosts = array($host."/"); 1024 | } 1025 | //Now make key & key prefix 1026 | $returnhosts = array(); 1027 | foreach($hosts as $value) 1028 | { 1029 | $fullhash = $this->sha256($value); 1030 | $returnhosts[$fullhash] = array("Host"=>$value,"Prefix"=>substr($fullhash,0,8),"Hash"=>$fullhash); 1031 | } 1032 | return $returnhosts; 1033 | } 1034 | /*Hash up a list of values from makePrefixes() (will possibly be 1035 | combined into that function at a later date*/ 1036 | function makeHashes($prefixarray) 1037 | { 1038 | if(count($prefixarray)>0) 1039 | { 1040 | $returnprefixes = array(); 1041 | foreach($prefixarray as $value) 1042 | { 1043 | $fullhash = $this->sha256($value); 1044 | $returnprefixes[$fullhash] = array("Original"=>$value,"Prefix"=>substr($fullhash,0,8),"Hash"=>$fullhash); 1045 | } 1046 | return $returnprefixes; 1047 | } 1048 | else 1049 | return false; 1050 | } 1051 | /*Make URL prefixes for use after a hostkey check*/ 1052 | function makePrefixes($host,$path,$query,$usingip) 1053 | { 1054 | $prefixes = array(); 1055 | //Exact hostname in the url 1056 | $hostcombos = array(); 1057 | $hostcombos[] = $host; 1058 | if(!$usingip) 1059 | { 1060 | $hostparts = explode('.',$host); 1061 | $backhostparts = array_reverse($hostparts); 1062 | if(count($backhostparts)>5) 1063 | $maxslice = 5; 1064 | else 1065 | $maxslice = count($backhostparts); 1066 | $topslice = array_slice($backhostparts,0,$maxslice); 1067 | while($maxslice>1) 1068 | { 1069 | $hostcombos[] = implode('.',array_reverse($topslice)); 1070 | $maxslice--; 1071 | $topslice = array_slice($backhostparts,0,$maxslice); 1072 | } 1073 | } 1074 | else 1075 | $hostcombos[] = $host; 1076 | $hostcombos = array_unique($hostcombos); 1077 | $variations = array(); 1078 | if(!empty($path)) 1079 | { 1080 | $pathparts = explode("/",$path); 1081 | if(count($pathparts)>4) 1082 | $upperlimit = 4; 1083 | else 1084 | $upperlimit = count($pathparts); 1085 | } 1086 | foreach($hostcombos as $key=>$value) 1087 | { 1088 | if(!empty($query)) 1089 | $variations[] = $value.$path.'?'.$query; 1090 | $variations[] = $value.$path; 1091 | if(!empty($path)) 1092 | { 1093 | $i = 0; 1094 | $pathiparts = ""; 1095 | while($i<$upperlimit) 1096 | { 1097 | if($i!=count($pathparts)-1) 1098 | $pathiparts = $pathiparts.$pathparts[$i]."/"; 1099 | else 1100 | $pathiparts = $pathiparts.$pathparts[$i]; 1101 | $variations[] = $value.$pathiparts; 1102 | $i++; 1103 | } 1104 | } 1105 | } 1106 | $variations = array_unique($variations); 1107 | return $this->makeHashes($variations); 1108 | } 1109 | /*Process data provided from the response of a full-hash GSB 1110 | request*/ 1111 | function processFullLookup($data) 1112 | { 1113 | $clonedata = $data; 1114 | $extracthash = array(); 1115 | while(strlen($clonedata)>0) 1116 | { 1117 | $splithead = explode("\n",$clonedata,2); 1118 | $chunkinfo = explode(':',$splithead[0]); 1119 | $listname = $chunkinfo[0]; 1120 | $addchunk = $chunkinfo[1]; 1121 | $chunklen = $chunkinfo[2]; 1122 | $chunkdata = bin2hex(substr($splithead[1],0,$chunklen)); 1123 | while(strlen($chunkdata)>0) 1124 | { 1125 | $extracthash[$listname][$addchunk] = substr($chunkdata,0,64); 1126 | $chunkdata = substr($chunkdata,64); 1127 | } 1128 | $clonedata = substr($splithead[1],$chunklen); 1129 | } 1130 | return $extracthash; 1131 | } 1132 | /*Add a full-hash key to a prefix or hostkey (the variable is $prefix but it could 1133 | be either).*/ 1134 | function addFullHash($prefix,$chunknum,$fullhash,$listname) 1135 | { 1136 | $buildtrunk = $listname."-a"; 1137 | //First check hosts 1138 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '$prefix' AND `Chunknum` = '$chunknum'"); 1139 | if($result&&mysqli_num_rows($result)>0) 1140 | { 1141 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC)) 1142 | { 1143 | if(empty($row['FullHash'])) 1144 | { 1145 | //We've got a live one! Insert the full hash for it 1146 | $addresult = mysqli_query($this->dbLink, "UPDATE `$buildtrunk-hosts` SET `FullHash` = '$fullhash' WHERE `ID` = '{$row['ID']}';"); 1147 | if(!$addresult) 1148 | $this->fatalerror("Could not cache full-hash key. $prefix, $chunknum, $fullhash, $listname"); 1149 | } 1150 | } 1151 | } 1152 | else 1153 | { 1154 | //If there are no rows it must be a prefix 1155 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-prefixes` WHERE `Prefix` = '$prefix'"); 1156 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC)) 1157 | { 1158 | if(empty($row['FullHash'])) 1159 | { 1160 | $resulttwo = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '{$row['Hostkey']}' AND `Chunknum` = '$chunknum'"); 1161 | while ($rowtwo = mysqli_fetch_array($resulttwo, MYSQLI_ASSOC)) 1162 | { 1163 | if(hexdec($rowtwo['Count'])>0) 1164 | { 1165 | $addresult = mysqli_query($this->dbLink, "UPDATE `$buildtrunk-prefixes` SET `FullHash` = '$fullhash' WHERE `ID` = '{$row['ID']}';"); 1166 | if(!$addresult) 1167 | $this->fatalerror("Could not cache full-hash key. $prefix, $chunknum, $fullhash, $listname"); 1168 | } 1169 | } 1170 | } 1171 | } 1172 | } 1173 | 1174 | } 1175 | /*Check database for any cached full-length hashes for a given prefix.*/ 1176 | function cacheCheck($prefix) 1177 | { 1178 | foreach($this->usinglists as $value) 1179 | { 1180 | $buildtrunk = $value."-a"; 1181 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '$prefix' AND `FullHash` != ''"); 1182 | if($result&&mysqli_num_rows($result)>0) 1183 | { 1184 | while($row = mysqli_fetch_array($result, MYSQLI_ASSOC)) 1185 | { 1186 | return array($row['FullHash'],$row['Chunknum']); 1187 | } 1188 | } 1189 | else 1190 | { 1191 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-prefixes` WHERE `Prefix` = '$prefix' AND `FullHash` != ''"); 1192 | if($result&&mysqli_num_rows($result)>0) 1193 | { 1194 | while($row = mysqli_fetch_array($result, MYSQLI_ASSOC)) 1195 | { 1196 | $resulttwo = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '{$row['Hostkey']}'"); 1197 | while ($rowtwo = mysqli_fetch_array($resulttwo, MYSQLI_ASSOC)) 1198 | { 1199 | if(hexdec($rowtwo['Count'])>0) 1200 | { 1201 | return array($row['FullHash'],$rowtwo['Chunknum']); 1202 | } 1203 | 1204 | } 1205 | } 1206 | } 1207 | } 1208 | } 1209 | return false; 1210 | } 1211 | /*Do a full-hash lookup based on prefixes provided, returns (bool) true 1212 | on a match and (bool) false on no match.*/ 1213 | function doFullLookup($prefixes,$originals) 1214 | { 1215 | //Store copy of original prefixes 1216 | $cloneprefixes = $prefixes; 1217 | //They should really all have the same prefix size.. we'll just check one 1218 | $prefixsize = strlen($prefixes[0][0])/2; 1219 | $length = count($prefixes)*$prefixsize; 1220 | foreach($prefixes as $key=>$value) 1221 | { 1222 | //Check cache on each iteration (we can return true earlier if we get a match!) 1223 | $cachechk = $this->cacheCheck($value[0]); 1224 | if($cachechk) 1225 | { 1226 | if(isset($originals[$cachechk[0]])) 1227 | { 1228 | //Check from same chunk 1229 | foreach($cloneprefixes as $nnewvalue) 1230 | { 1231 | if($nnewvalue[1]==$cachechk[1]&&$value[0]==$originals[$cachechk[0]]['Prefix']) 1232 | { 1233 | //From same chunks 1234 | return true; 1235 | } 1236 | 1237 | } 1238 | } 1239 | } 1240 | $prefixes[$key] = pack("H*",$value[0]); 1241 | } 1242 | //No cache matches so we continue with request 1243 | $body = "$prefixsize:$length\n".implode("",$prefixes); 1244 | 1245 | $buildopts = array(CURLOPT_POST=>true,CURLOPT_POSTFIELDS=>$body); 1246 | $result = $this->googleDownloader("http://safebrowsing.clients.google.com/safebrowsing/gethash?client=api&apikey=".$this->apikey."&appver=".$this->version."&pver=".$this->apiversion,$buildopts,"lookup"); 1247 | 1248 | if($result[0]['http_code']==200&&!empty($result[1])) 1249 | { 1250 | //Extract hashes from response 1251 | $extractedhashes = $this->processFullLookup($result[1]); 1252 | //Loop over each list 1253 | foreach($extractedhashes as $key=>$value) 1254 | { 1255 | //Loop over each value in each list 1256 | foreach($value as $newkey=>$newvalue) 1257 | { 1258 | if(isset($originals[$newvalue])) 1259 | { 1260 | //Okay it matches a full-hash we have, now to check they're from the same chunks 1261 | foreach($cloneprefixes as $nnewvalue) 1262 | { 1263 | if($nnewvalue[1]==$newkey&&$nnewvalue[0]==$originals[$newvalue]['Prefix']) 1264 | { 1265 | //From same chunks 1266 | //Add full hash to database (cache) 1267 | $this->addFullHash($nnewvalue[0],$nnewvalue[1],$newvalue,$key); 1268 | return true; 1269 | } 1270 | 1271 | } 1272 | } 1273 | } 1274 | } 1275 | return false; 1276 | } 1277 | elseif($result[0]['http_code']==204&&strlen($result[1])==0) 1278 | { 1279 | //204 Means no match 1280 | return false; 1281 | } 1282 | else 1283 | { 1284 | //"No No No! This just doesn't add up at all!" 1285 | $this->fatalerror("ERROR: Invalid response returned from GSB ({$result[0]['http_code']})"); 1286 | } 1287 | } 1288 | /*Checks to see if a match for a prefix is found in the sub table, if it is then we won't do 1289 | a full-hash lookup. Return true on match in sub list, return false on negative.*/ 1290 | function subCheck($listname,$prefixlist,$mode) 1291 | { 1292 | $buildtrunk = $listname.'-s'; 1293 | if($mode=="prefix") 1294 | { 1295 | //Mode is prefix so the add part was a prefix, not a hostkey so we just check prefixes (saves a lookup) 1296 | foreach($prefixlist as $value) 1297 | { 1298 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-prefixes` WHERE `Prefix` = '{$value[0]}'"); 1299 | if($result&&mysqli_num_rows($result)>0) 1300 | { 1301 | //As interpreted from Developer Guide if theres a match in sub list it cancels out the add listing 1302 | //we'll double check its from the same chunk just to be pedantic 1303 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC)) 1304 | { 1305 | if(hexdec($row['AddChunkNum'])==$value[1]) 1306 | return true; 1307 | } 1308 | } 1309 | 1310 | } 1311 | return false; 1312 | } 1313 | elseif($mode=="hostkey") 1314 | { 1315 | //Mode is hostkey 1316 | foreach($prefixlist as $value) 1317 | { 1318 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-prefixes` WHERE `Hostkey` = '{$value[0]}'"); 1319 | if($result&&mysqli_num_rows($result)>0) 1320 | { 1321 | //As interpreted from Developer Guide if theres a match in sub list it cancels out the add listing 1322 | //we'll double check its from the same chunk just to be pedantic 1323 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC)) 1324 | { 1325 | if(hexdec($row['AddChunkNum'])==$value[1]&&empty($row['Prefix'])) 1326 | return true; 1327 | } 1328 | } 1329 | 1330 | } 1331 | return false; 1332 | } 1333 | $this->fatalerror("Invalid SubCheck Mode $mode"); 1334 | } 1335 | /*Does a full URL lookup on given lists, will check if its in database, if slight match there then 1336 | will do a full-hash lookup on GSB, returns (bool) true on match and (bool) false on negative.*/ 1337 | function doLookup($url) 1338 | { 1339 | $lists = $this->usinglists; 1340 | //First canonicalize the URL 1341 | $canurl = $this->Canonicalize($url); 1342 | //Make hostkeys 1343 | $hostkeys = $this->makeHostKey($canurl['Parts']['Host'],$canurl['Parts']['IP']); 1344 | $matches = array(); 1345 | foreach($lists as $key=>$value) 1346 | { 1347 | $buildtrunk = $value.'-a'; 1348 | //Loop over each list 1349 | foreach($hostkeys as $keyinner=>$valueinner) 1350 | { 1351 | //Within each list loop over each hostkey 1352 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '{$valueinner['Prefix']}'"); 1353 | if($result&&mysqli_num_rows($result)>0) 1354 | { 1355 | //For each hostkey match 1356 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC)) 1357 | { 1358 | $nicecount = hexdec($row['Count']); 1359 | if($nicecount>0) 1360 | { 1361 | //There was a match and the count is more than one so there are prefixes! 1362 | //Hash up a load of prefixes and create the build query if we haven't done so already 1363 | if(!isset($prefixes)) 1364 | { 1365 | $prefixes = $this->makePrefixes($canurl['Parts']['Host'],$canurl['Parts']['Path'],$canurl['Parts']['Query'],$canurl['Parts']['IP']); 1366 | $buildprequery = array(); 1367 | foreach($prefixes as $prefix) 1368 | { 1369 | $buildprequery[] = " `Prefix` = '{$prefix['Prefix']}' "; 1370 | } 1371 | $buildprequery = implode("OR",$buildprequery); 1372 | } 1373 | //Check if there are any matching prefixes 1374 | $resulttwo = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-prefixes` WHERE ($buildprequery) AND `Hostkey` = '{$row['Hostkey']}'"); 1375 | if($resulttwo&&mysqli_num_rows($resulttwo)>0) 1376 | { 1377 | //We found prefix matches 1378 | $prematches = array(); 1379 | $prelookup = array(); 1380 | while ($rowtwo = mysqli_fetch_array($resulttwo, MYSQLI_ASSOC)) 1381 | { 1382 | $prematches[] = array($rowtwo['Prefix'],$row['Chunknum']); 1383 | } 1384 | //Before we send off any requests first check whether its in sub table 1385 | $subchk = $this->subCheck($value,$prematches,"prefix"); 1386 | if(!$subchk) 1387 | { 1388 | //Send off any matching prefixes to do some full-hash key checks 1389 | $flookup = $this->doFullLookup($prematches,$prefixes); 1390 | if($flookup) 1391 | return true; 1392 | } 1393 | } 1394 | //If we didn't find matches then do nothing (keep looping till end and it'll return negative) 1395 | } 1396 | else 1397 | { 1398 | $subchk = $this->subCheck($value,array(array($row['Hostkey'],$row['Chunknum'])),"hostkey"); 1399 | if(!$subchk) 1400 | { 1401 | //There was a match but the count was 0 that entire domain could be a match, Send off to check 1402 | $flookup = $this->doFullLookup(array(array($row['Hostkey'],$row['Chunknum'])),$hostkeys); 1403 | if($flookup) 1404 | return true; 1405 | } 1406 | } 1407 | } 1408 | } 1409 | } 1410 | } 1411 | return false; 1412 | 1413 | } 1414 | } 1415 | --------------------------------------------------------------------------------