├── .gitignore
├── LICENSE
├── Readme.md
├── composer.json
├── install.php
├── listupdater.php
├── lookup.php
└── phpgsb.class.php
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2010-2015, Sam Cleaver
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 | * Redistributions of source code must retain the above copyright
7 | notice, this list of conditions and the following disclaimer.
8 | * Redistributions in binary form must reproduce the above copyright
9 | notice, this list of conditions and the following disclaimer in the
10 | documentation and/or other materials provided with the distribution.
11 | * Neither the name of the organization nor the
12 | names of its contributors may be used to endorse or promote products
13 | derived from this software without specific prior written permission.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
1 | # Implementation of Google Safe Browsing
2 |
3 | **phpGSB** is a robust implementation of the Google Safe Browsing API. It currently *does* allow the following:
4 |
5 | * Updating of GSB lists to a MySQL database
6 | * Basic checking of URLs' against lists and then full-hash checks against the full GSB database
7 | * Caching of full-hash keys to minimise requests to the remote Google server
8 |
9 | At current it *does not* allow the following:
10 |
11 | * Requests using MAC keys (integrity checks)
12 |
13 | The main class is definitely not as efficient as it could be and has developed very quickly to meet the basic GSB specification; any contributions, bug fixes etc are **very** welcome!
14 |
15 | ## Download
16 |
17 | * Installation using composer
18 | ```
19 | $ composer require samcleaver/phpgsb
20 | ```
21 |
22 | * You could manually download phpgsb by this link: https://github.com/samcleaver/phpGSB/archive/0.2.6.zip
23 |
24 | ## Installation
25 |
26 | 1. Enter database details into install.php (Replace DATABASE_USERNAME, DATABASE_NAME and DATABASE_PASSWORD with respective information)
27 | 2. Run install.php
28 | 3. Look at listupdater.php and lookup.php example files for basic methods on using the system.
29 | 4. If you choose to use listupdater.php as-is then set it as a cron job/scheduled task to run every minute. *(It won't actually update every minute but is required incase of backoff procedures and timeouts)*
30 |
31 | ## FAQ
32 |
33 | * **When I do a lookup, phpGSB says the URL is safe but I know it's not.**
34 | *The database is updated in chunks from Google's central server. Because of this, you need to run updates for 24 hours before you can start doing lookups, this is a limitation of the specification and not the implementation. (Check Step 5 of installation on how to ensure updates are running.)*
35 |
36 | ## License
37 |
38 | The phpGSB library is released under the New BSD License.
39 |
40 | ```
41 | Copyright (c) 2010-2015, Sam Cleaver
42 |
43 | Redistribution and use in source and binary forms, with or without
44 | modification, are permitted provided that the following conditions are met:
45 | * Redistributions of source code must retain the above copyright
46 | notice, this list of conditions and the following disclaimer.
47 | * Redistributions in binary form must reproduce the above copyright
48 | notice, this list of conditions and the following disclaimer in the
49 | documentation and/or other materials provided with the distribution.
50 | * Neither the name of the organization nor the
51 | names of its contributors may be used to endorse or promote products
52 | derived from this software without specific prior written permission.
53 |
54 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
55 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
58 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
60 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
61 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
62 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64 | ```
65 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name":"samcleaver/phpgsb",
3 | "description":"PHP Google Safe Browsing",
4 | "keywords":["php","google","gsf", "google safe browsing"],
5 | "type":"library",
6 | "license":"BSD-3-Clause",
7 | "version": "0.2.7",
8 | "authors":[
9 | {
10 | "name":"Sam Cleaver",
11 | "homepage":"https://github.com/samcleaver"
12 | }
13 | ],
14 | "homepage": "https://github.com/samcleaver/phpgsb",
15 | "require":{
16 | "php":">=5.3.0"
17 | },
18 | "autoload":{
19 | "classmap": [
20 | "phpgsb.class.php"
21 | ]
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/install.php:
--------------------------------------------------------------------------------
1 | usinglists = array('googpub-phish-shavar','goog-malware-shavar', 'goog-unwanted-shavar');
13 | //Install MySQL tables
14 | foreach($phpgsb->usinglists as $value)
15 | {
16 | //Create ADD tables
17 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-a-hosts` (
18 | `ID` int(255) NOT NULL auto_increment,
19 | `Hostkey` varchar(8) NOT NULL,
20 | `Chunknum` int(255) NOT NULL,
21 | `Count` varchar(2) NOT NULL default '0',
22 | `FullHash` varchar(70) NOT NULL,
23 | PRIMARY KEY (`ID`),
24 | KEY `Hostkey` (`Hostkey`)
25 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;");
26 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-a-index` (
27 | `ChunkNum` int(255) NOT NULL auto_increment,
28 | `Chunklen` int(255) NOT NULL default '0',
29 | PRIMARY KEY (`ChunkNum`)
30 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;");
31 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-a-prefixes` (
32 | `ID` int(255) NOT NULL auto_increment,
33 | `Hostkey` varchar(8) NOT NULL,
34 | `Prefix` varchar(255) NOT NULL,
35 | `FullHash` varchar(70) NOT NULL,
36 | PRIMARY KEY (`ID`),
37 | KEY `Hostkey` (`Hostkey`)
38 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;");
39 | //Create SUB tables
40 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-s-hosts` (
41 | `ID` int(255) NOT NULL auto_increment,
42 | `Hostkey` varchar(8) NOT NULL,
43 | `Chunknum` int(255) NOT NULL,
44 | `Count` varchar(2) NOT NULL default '0',
45 | `FullHash` varchar(70) NOT NULL,
46 | PRIMARY KEY (`ID`),
47 | KEY `Hostkey` (`Hostkey`)
48 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;");
49 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-s-index` (
50 | `ChunkNum` int(255) NOT NULL auto_increment,
51 | `Chunklen` int(255) NOT NULL default '0',
52 | PRIMARY KEY (`ChunkNum`)
53 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;");
54 | mysqli_query($phpgsb->getDbLink(), "CREATE TABLE IF NOT EXISTS `$value-s-prefixes` (
55 | `ID` int(255) NOT NULL auto_increment,
56 | `Hostkey` varchar(8) NOT NULL,
57 | `AddChunkNum` varchar(8) NOT NULL,
58 | `Prefix` varchar(255) NOT NULL,
59 | `FullHash` varchar(70) NOT NULL,
60 | PRIMARY KEY (`ID`),
61 | KEY `Hostkey` (`Hostkey`)
62 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;");
63 | }
64 | //Check timeout files writable
65 | if(file_put_contents("testfile.dat","TEST PRE-USE PHPGSB ".time()))
66 | unlink("testfile.dat");
67 | else
68 | echo "ERROR: THIS DIRECTORY IS NOT WRITABLE, CHMOD to 775 or 777";
69 | ?>
--------------------------------------------------------------------------------
/listupdater.php:
--------------------------------------------------------------------------------
1 | apikey = "API_KEY_HERE";
14 | $phpgsb->usinglists = array('googpub-phish-shavar','goog-malware-shavar', 'goog-unwanted-shavar');
15 | $phpgsb->runUpdate();
16 | $phpgsb->close();
17 | ?>
--------------------------------------------------------------------------------
/lookup.php:
--------------------------------------------------------------------------------
1 | apikey = "API_KEY_HERE";
14 | $phpgsb->usinglists = array('googpub-phish-shavar','goog-malware-shavar', 'goog-unwanted-shavar');
15 | //Should return false (not phishing or malware)
16 | var_dump($phpgsb->doLookup('http://www.google.com'));
17 | //Should return true, malicious URL
18 | var_dump($phpgsb->doLookup('http://www.gumblar.cn'));
19 | $phpgsb->close();
20 | ?>
--------------------------------------------------------------------------------
/phpgsb.class.php:
--------------------------------------------------------------------------------
1 | phpGSB($database, $username, $password, $host, $port, $verbose);
31 | }
32 | function phpGSB($database=false,$username=false,$password=false,$host="localhost",$port=3306,$verbose=true)
33 | {
34 | if(!$verbose)
35 | $this->silent();
36 | $this->outputmsg("phpGSB Loaded");
37 | if($database&&$username)
38 | $this->dbConnect($database,$username,$password,$host,$port);
39 | }
40 | function close()
41 | {
42 | mysqli_close($this->dbLink);
43 | $this->outputmsg("Closing phpGSB. (Peak Memory: ".(round(memory_get_peak_usage()/1048576,3))."MB)");
44 | }
45 | function silent()
46 | {
47 | $this->verbose = false;
48 | }
49 | function trans_disable()
50 | {
51 | $this->transenabled = false;
52 | }
53 | function trans_enable()
54 | {
55 | $this->transenabled = true;
56 | }
57 | function trans_begin()
58 | {
59 | if($this->transenabled)
60 | {
61 | $this->transtarted = true;
62 | $this->outputmsg("Begin MySQL Transaction");
63 | mysqli_query($this->dbLink, "BEGIN");
64 | }
65 | }
66 | function trans_commit()
67 | {
68 | if($this->transtarted&&mysqli_ping($this->dbLink)&&$this->transenabled)
69 | {
70 | $this->transtarted = false;
71 | $this->outputmsg("Comitting Transaction");
72 | mysqli_query($this->dbLink, "COMMIT");
73 | }
74 | }
75 | function trans_rollback()
76 | {
77 | if($this->transtarted&&mysqli_ping($this->dbLink)&&$this->transenabled)
78 | {
79 | $this->transtarted = false;
80 | $this->outputmsg("Rolling Back Transaction");
81 | mysqli_query($this->dbLink, "ROLLBACK");
82 | }
83 | }
84 | /*Function to output messages, used instead of echo,
85 | will make it easier to have a verbose switch in later
86 | releases*/
87 | function outputmsg($msg)
88 | {
89 | if($this->verbose)
90 | {
91 | ob_start();
92 | echo $msg.'...
';
93 | $this->ob .= ob_get_contents();
94 | ob_end_flush();
95 | }
96 | }
97 | /*Function to output errors, used instead of echo,
98 | will make it easier to have a verbose switch in later
99 | releases*/
100 | function fatalerror($msg)
101 | {
102 | if($this->verbose)
103 | {
104 | ob_start();
105 | print_r($msg);
106 | echo '...
';
107 | $this->ob .= ob_get_contents();
108 | ob_end_flush();
109 | }
110 | $this->trans_rollback();
111 | die();
112 | }
113 | /*Wrapper to connect to database. Simples.*/
114 | function dbConnect($database,$username,$password,$host="localhost",$port=3306)
115 | {
116 | $this->dbLink = mysqli_connect($host, $username, $password, '', $port);
117 | if (!$this->dbLink) {
118 | $this->fatalerror('Could not connect: ' . mysqli_error($this->dbLink));
119 | }
120 | $this->outputmsg('Connected successfully to database server');
121 | $db_selected = mysqli_select_db($this->dbLink, $database);
122 | if (!$db_selected) {
123 | $this->fatalerror('Can\'t use $database : ' . mysqli_error($this->dbLink));
124 | }
125 | $this->outputmsg('Connected to database successfully');
126 | }
127 | public function getDbLink()
128 | {
129 | return $this->dbLink;
130 | }
131 | /*Simple logic function to calculate timeout
132 | based on the number of previous errors*/
133 | function calc($errors)
134 | {
135 | //According to Developer Guide Formula
136 | if($errors==1)
137 | {
138 | //According to Developer Guide (1st error, wait a minute)
139 | return 60;
140 | }
141 | elseif($errors>5)
142 | {
143 | //According to Developer Guide (Above 5 errors check every 4 hours)
144 | return 28800;
145 | }
146 | else
147 | {
148 | //According to Developer Guide we simply double up our timeout each time and use formula:
149 | //(Adapted to be relative to errors) ( ((2^$errors) * 7.5) * (decimalrand(0,1) + 1)) to produce
150 | // a result between: 120min-240min for example
151 | return floor((pow(2,$errors) * 7.5) * ((rand(0,1000)/1000) + 1));
152 | }
153 | }
154 | /*Writes backoff timeouts, uses calc() to
155 | calculate timeouts and then writes to file
156 | for next check*/
157 | function Backoff($errdata=false,$type)
158 | {
159 | if($type=="data")
160 | $file = 'nextcheck.dat';
161 | else
162 | $file = 'nextcheckl.dat';
163 | $curstatus = explode('||',file_get_contents($this->pingfilepath.$file));
164 | $curstatus[1] = $curstatus[1] + 1;
165 | $seconds = $this->calc($curstatus[1]);
166 | $until = time()+$seconds.'||'.$curstatus[1];
167 | file_put_contents($this->pingfilepath.$file,$until);
168 | $this->fatalerror(array("Invalid Response... Backing Off",$errdata));
169 | }
170 | /*Writes timeout from valid requests to nextcheck file*/
171 | function setTimeout($seconds)
172 | {
173 | if (file_exists($this->pingfilepath.'nextcheck.dat')) {
174 | $curstatus = explode('||',@file_get_contents($this->pingfilepath.'nextcheck.dat'));
175 | $until = time()+$seconds.'||'.$curstatus[1];
176 | } else {
177 | $until = time()+$seconds.'||';
178 | }
179 | file_put_contents($this->pingfilepath.'nextcheck.dat',$until);
180 | }
181 | /*Checks timeout in timeout files (usually performed at the
182 | start of script)*/
183 | function checkTimeout($type)
184 | {
185 | if($type=="data")
186 | $file = 'nextcheck.dat';
187 | else
188 | $file = 'nextcheckl.dat';
189 | $curstatus = explode('||',file_get_contents($this->pingfilepath.$file));
190 | if(time()<$curstatus[0])
191 | {
192 | $this->fatalerror("Must wait another ".($curstatus[0]-time()). " seconds before another request");
193 | }
194 | else
195 | $this->outputmsg("Allowed to request");
196 | }
197 | /*Function downloads from URL's, POST data can be
198 | passed via $options. $followbackoff indicates
199 | whether to follow backoff procedures or not*/
200 | function googleDownloader($url,$options,$followbackoff=false)
201 | {
202 | $ch = curl_init();
203 | curl_setopt($ch, CURLOPT_URL, $url);
204 | curl_setopt($ch, CURLOPT_HEADER, 0);
205 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
206 |
207 | if(is_array($options))
208 | curl_setopt_array($ch, $options);
209 |
210 | $data = curl_exec($ch);
211 | $info = curl_getinfo($ch);
212 | curl_close($ch);
213 | if($followbackoff&&$info['http_code']>299)
214 | {
215 | $this->Backoff($info,$followbackoff);
216 | }
217 | return array($info,$data);
218 | }
219 | //UPDATER FUNCTIONS
220 | /*Resets lists database, only called if GSB issues r:resetdatabase*/
221 | function resetDatabase()
222 | {
223 | //Lord knows why they would EVER issue this request!
224 | if(!empty($this->adminemail))
225 | mail($this->adminemail,'Reset Database Request Issued','For some crazy unknown reason GSB requested a database reset at '.time());
226 | foreach($this->usinglists as $value)
227 | {
228 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-s-index`");
229 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-s-hosts`");
230 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-s-prefixes`");
231 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-a-index`");
232 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-a-hosts`");
233 | mysqli_query($this->dbLink, "TRUNCATE TABLE `$value-a-prefixes`");
234 | }
235 | }
236 | /*Processes data recieved from a GSB data request into a managable array*/
237 | function processChunks($fulldata,$listname)
238 | {
239 | $subarray = array();
240 | $addarray = array();
241 | $loaddata = trim($fulldata);
242 | $clonedata = $loaddata;
243 | while(strlen($clonedata)>0)
244 | {
245 | $splithead = explode("\n",$clonedata,2);
246 | $chunkinfo = explode(':',$splithead[0]);
247 | $type = $chunkinfo[0];
248 | $chunknum = $chunkinfo[1];
249 | $hashlen = $chunkinfo[2];
250 | $chunklen = $chunkinfo[3];
251 | if($chunklen>0)
252 | {
253 | $tmparray = array();
254 | //Convert to hex for easy processing
255 | //First get chunkdata according to length
256 | $chunkdata = bin2hex(substr($splithead[1],0,$chunklen));
257 | if($type=='a')
258 | {
259 | $maini = 0;
260 | while(strlen($chunkdata)>0)
261 | {
262 | $tmparray[$maini]['HOSTKEY'] = substr($chunkdata, 0, 8);
263 | $tmparray[$maini]['COUNT'] = substr($chunkdata, 8, 2);
264 | $chunkdata = substr($chunkdata,10);
265 | $realcount = hexdec($tmparray[$maini]['COUNT']);
266 | if($realcount>0)
267 | {
268 | for ($i = 0; $i < $realcount; $i++) {
269 | $tmparray[$maini]['PAIRS'][$i]['PREFIX'] = substr($chunkdata, 0, ($hashlen*2));
270 | $chunkdata = substr($chunkdata,(($hashlen*2)));
271 | }
272 | }
273 | elseif($realcount<0)
274 | {
275 | $this->fatalerror(array("Decoding Error, Somethings gone wrong!",$tmparray[$maini]));
276 | }
277 | $maini++;
278 | }
279 | $addarray['CHUNKNUM'] = $chunknum;
280 | $addarray['HASHLEN'] = $hashlen;
281 | $addarray['CHUNKLEN'] = $chunklen;
282 | $addarray['REAL'] = $tmparray;
283 | $this->saveChunkPart($addarray,"ADD",$listname);
284 | unset($addarray);
285 | }
286 | elseif($type=='s')
287 | {
288 | $maini = 0;
289 | while(strlen($chunkdata)>0)
290 | {
291 | $tmparray[$maini]['HOSTKEY'] = substr($chunkdata, 0, 8);
292 | $tmparray[$maini]['COUNT'] = substr($chunkdata, 8, 2);
293 | $chunkdata = substr($chunkdata,10);
294 | $realcount = hexdec($tmparray[$maini]['COUNT']);
295 | if($realcount>0)
296 | {
297 | for ($i = 0; $i < $realcount; $i++) {
298 | $tmparray[$maini]['PAIRS'][$i]['ADDCHUNKNUM'] = substr($chunkdata, 0, 8);
299 | $tmparray[$maini]['PAIRS'][$i]['PREFIX'] = substr($chunkdata, 8, ($hashlen*2));
300 | $chunkdata = substr($chunkdata,(($hashlen*2)+8));
301 | }
302 | }
303 | elseif($realcount==0)
304 | {
305 | $tmparray[$maini]['PAIRS'][0]['ADDCHUNKNUM'] = substr($chunkdata, 0, 8);
306 | $chunkdata = substr($chunkdata, 8);
307 | }
308 | else
309 | {
310 | $this->fatalerror(array("Decoding Error, Somethings gone wrong!",$tmparray[$maini]));
311 | }
312 | $maini++;
313 | }
314 | $subarray['CHUNKNUM'] = $chunknum;
315 | $subarray['HASHLEN'] = $hashlen;
316 | $subarray['CHUNKLEN'] = $chunklen;
317 | $subarray['REAL'] = $tmparray;
318 | $this->saveChunkPart($subarray,"SUB",$listname);
319 | unset($subarray);
320 | }
321 | else
322 | {
323 | $this->outputmsg("DISCARDED CHUNKNUM: $chunknum (Had no valid label)");
324 | }
325 | }
326 | else
327 | {
328 | //No ChunkData, Still Insert
329 | if($type=='a')
330 | {
331 | $addarray['CHUNKNUM'] = $chunknum;
332 | $addarray['HASHLEN'] = $hashlen;
333 | $addarray['CHUNKLEN'] = $chunklen;
334 | $this->saveChunkPart($addarray,"ADD",$listname);
335 | unset($addarray);
336 | }
337 | elseif($type=='s')
338 | {
339 | $subarray['CHUNKNUM'] = $chunknum;
340 | $subarray['HASHLEN'] = $hashlen;
341 | $subarray['CHUNKLEN'] = $chunklen;
342 | $this->saveChunkPart($subarray,"SUB",$listname);
343 | unset($subarray);
344 | }
345 | else
346 | {
347 | $this->outputmsg("DISCARDED CHUNKNUM: $chunknum (Empty)");
348 | }
349 | }
350 | $clonedata = substr($splithead[1],$chunklen);
351 | }
352 | return true;
353 | }
354 | /*Saves processed data to the MySQL database*/
355 | function saveChunkPart($data,$type,$listname)
356 | {
357 | $listname = trim($listname);
358 | //Check what type of data it is...
359 | $buildindex = array();
360 | $buildhost = array();
361 | $buildpairs = array();
362 | if($type=="SUB")
363 | {
364 | $value = $data;
365 | if(!isset($this->mainlist['s'][$listname][$value['CHUNKNUM']]))
366 | {
367 | $this->mainlist['s'][$listname][$value['CHUNKNUM']] = true;
368 | $buildindex[] = "('{$value['CHUNKNUM']}','{$value['CHUNKLEN']}')";
369 | if($value['CHUNKLEN']>0)
370 | {
371 | foreach($value['REAL'] as $newkey=>$newvalue)
372 | {
373 | $buildhost[] = "('{$newvalue['HOSTKEY']}','{$value['CHUNKNUM']}','{$newvalue['COUNT']}','')";
374 | if(isset($newvalue['PAIRS'])&&count($newvalue['PAIRS'])>0)
375 | {
376 | foreach($newvalue['PAIRS'] as $innerkey=>$innervalue)
377 | {
378 | if( isset($innervalue['PREFIX']) ) {
379 | $buildpairs[] = "('{$newvalue['HOSTKEY']}','{$innervalue['ADDCHUNKNUM']}','{$innervalue['PREFIX']}','')";
380 | } else {
381 | $buildpairs[] = "('{$newvalue['HOSTKEY']}','{$innervalue['ADDCHUNKNUM']}','','')";
382 | }
383 | }
384 | }
385 | }
386 | }
387 | }
388 | }
389 | else if($type=="ADD")
390 | {
391 | //Then lets insert add data
392 | $value = $data;
393 | if(!isset($this->mainlist['a'][$listname][$value['CHUNKNUM']]))
394 | {
395 | $this->mainlist['a'][$listname][$value['CHUNKNUM']] = true;
396 | $buildindex[] = "('{$value['CHUNKNUM']}','{$value['CHUNKLEN']}')";
397 | if($value['CHUNKLEN']>0)
398 | {
399 | foreach($value['REAL'] as $newkey=>$newvalue)
400 | {
401 | $buildhost[] = "('{$newvalue['HOSTKEY']}','{$value['CHUNKNUM']}','{$newvalue['COUNT']}','')";
402 | if(isset($newvalue['PAIRS'])&&count($newvalue['PAIRS'])>0)
403 | {
404 | foreach($newvalue['PAIRS'] as $innerkey=>$innervalue)
405 | {
406 | if( isset($innervalue['PREFIX']) ) {
407 | $buildpairs[] = "('{$newvalue['HOSTKEY']}','{$innervalue['PREFIX']}','')";
408 | } else {
409 | $buildpairs[] = "('{$newvalue['HOSTKEY']}','','')";
410 | }
411 | }
412 | }
413 | }
414 | }
415 | }
416 | }
417 | if(count($buildindex)>0)
418 | {
419 | if($type=="ADD")
420 | $listtype = 'a';
421 | elseif($type=="SUB")
422 | $listtype = 's';
423 | //Insert index value
424 | $indexinsert = implode(', ',$buildindex);
425 | $indexins = mysqli_query($this->dbLink, "INSERT INTO `$listname-$listtype-index` (`ChunkNum`,`Chunklen`) VALUES $indexinsert;");
426 | $error = mysqli_error($this->dbLink);
427 | if($indexins)
428 | {
429 | if(count($buildhost)>0)
430 | {
431 | //Insert hostkeys index
432 | $hostinsert = implode(', ',$buildhost);
433 | mysqli_query($this->dbLink, "INSERT INTO `$listname-$listtype-hosts` (`Hostkey`,`Chunknum`,`Count`,`FullHash`) VALUES $hostinsert;");
434 | $error = mysqli_error($this->dbLink);
435 | if(!empty($error))
436 | $this->outputmsg("INSERTED $listname $type HOST KEYS ".mysqli_error($this->dbLink));
437 | }
438 | if(count($buildpairs)>0)
439 | {
440 | //Insert prefixes
441 | $pairinsert = implode(', ',$buildpairs);
442 | if($type=="ADD")
443 | mysqli_query($this->dbLink, "INSERT INTO `$listname-$listtype-prefixes` (`Hostkey`,`Prefix`,`FullHash`) VALUES $pairinsert;");
444 | elseif($type=="SUB")
445 | mysqli_query($this->dbLink, "INSERT INTO `$listname-$listtype-prefixes` (`Hostkey`,`AddChunkNum`,`Prefix`,`FullHash`) VALUES $pairinsert;");
446 | $error = mysqli_error($this->dbLink);
447 | if(!empty($error))
448 | $this->outputmsg("INSERTED $listname $type PREFIX HOST KEYS ".mysqli_error($this->dbLink));
449 | }
450 | }
451 | elseif(!empty($error))
452 | $this->outputmsg("COULD NOT SAVE $listname $type INDEXS ".mysqli_error($this->dbLink));
453 | }
454 | }
455 | /*Get ranges of existing chunks from a requested list
456 | and type (add [a] or sub [s] return them and set
457 | mainlist to recieved for that chunk (prevent dupes)*/
458 | function getRanges($listname,$mode)
459 | {
460 | $checktable = $listname.'-'.$mode.'-index';
461 | $results = mysqli_query($this->dbLink, "SELECT ChunkNum FROM `$checktable` ORDER BY `ChunkNum` ASC");
462 | $ranges = array();
463 | $i = 0;
464 | $start = 0;
465 | while ($row = mysqli_fetch_array($results, MYSQLI_BOTH))
466 | {
467 | $this->mainlist[$mode][$listname][$row['ChunkNum']] = true;
468 | if($i==0)
469 | {
470 | $start = $row['ChunkNum'];
471 | $previous = $row['ChunkNum'];
472 | }
473 | else
474 | {
475 | $expected = $previous + 1;
476 | if($row['ChunkNum']!=$expected)
477 | {
478 | if($start==$previous)
479 | $ranges[] = $start;
480 | else
481 | $ranges[] = $start.'-'.$previous;
482 | $start = $row['ChunkNum'];
483 | }
484 | $previous = $row['ChunkNum'];
485 | }
486 | $i++;
487 | }
488 | if($start>0&&$previous>0)
489 | {
490 | if($start==$previous)
491 | $ranges[] = $start;
492 | else
493 | $ranges[] = $start.'-'.$previous;
494 | }
495 | return $ranges;
496 | }
497 | /*Get both add and sub ranges for a requested list*/
498 | function getFullRanges($listname)
499 | {
500 | $subranges = $this->getRanges($listname,'s');
501 | $addranges = $this->getRanges($listname,'a');
502 | return array("Subranges"=>$subranges,"Addranges"=>$addranges);
503 | }
504 | /*Format a full request body for a desired list including
505 | name and full ranges for add and sub*/
506 | function formattedRequest($listname)
507 | {
508 | $fullranges = $this->getFullRanges($listname);
509 | $buildpart = '';
510 | if(count($fullranges['Subranges'])>0)
511 | $buildpart .= 's:'.implode(',',$fullranges['Subranges']);
512 | if(count($fullranges['Subranges'])>0&&count($fullranges['Addranges'])>0)
513 | $buildpart .= ':';
514 | if(count($fullranges['Addranges'])>0)
515 | $buildpart .= 'a:'.implode(',',$fullranges['Addranges']);
516 | return $listname.';'.$buildpart."\n";
517 | }
518 | /*Called when GSB returns a SUB-DEL or ADD-DEL response*/
519 | function deleteRange($range,$mode,$listname)
520 | {
521 | $buildtrunk = $listname.'-'.$mode;
522 | if(substr_count($range,'-')>0)
523 | {
524 | $deleterange = explode('-',trim($range));
525 | $clause = "`ChunkNum` >= '{$deleterange[0]}' AND `ChunkNum` <= '{$deleterange[1]}'";
526 | }
527 | else
528 | $clause = "`ChunkNum` = '$range'";
529 | //Delete from index
530 | mysqli_query($this->dbLink, "DELETE FROM `$buildtrunk-index` WHERE $clause");
531 |
532 | //Select all host keys that match chunks (we'll delete them after but we need the hostkeys list!)
533 | $result = mysqli_query($this->dbLink, "SELECT `Hostkey` FROM `$buildtrunk-hosts` WHERE $clause");
534 | $buildprefixdel = array();
535 | if($result&&mysqli_num_rows($result)>0)
536 | {
537 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC))
538 | {
539 | if(!empty($row['Hostkey']))
540 | $buildprefixdel[] = $row['Hostkey'];
541 | }
542 | if (count($buildprefixdel)) {
543 | //Delete all matching hostkey prefixes
544 | mysqli_query(
545 | $this->dbLink,
546 | "DELETE FROM `$buildtrunk-prefixes` WHERE `Hostkey` in ('" . implode('\',\'', $buildprefixdel) . "')"
547 | );
548 | }
549 |
550 | //Delete all matching hostkeys
551 | mysqli_query($this->dbLink, "DELETE FROM `$buildtrunk-hosts` WHERE $clause");
552 | }
553 | }
554 | /*Main part of updater function, will call all other functions, merely requires
555 | the request body, it will then process and save all data as well as checking
556 | for ADD-DEL and SUB-DEL, runs silently so won't return anything on success*/
557 | function getData($body)
558 | {
559 | if(empty($body))
560 | $this->fatalerror("Missing a body for data request");
561 | $this->trans_begin();
562 | $buildopts = array(CURLOPT_POST=>true,CURLOPT_POSTFIELDS=>$body."\n");
563 | $result = $this->googleDownloader("http://safebrowsing.clients.google.com/safebrowsing/downloads?client=api&apikey=".$this->apikey."&appver=".$this->version."&pver=".$this->apiversion,$buildopts,"data");
564 | preg_match('/^n:(.*)$/m', $result[1], $match);
565 | $timeout = $match[1];
566 | $this->setTimeout($timeout);
567 | if(substr_count($result[1],'r:pleasereset')>0)
568 | $this->resetDatabase();
569 | else
570 | {
571 | $formattedlist = array();
572 | if(substr_count($result[1],'i:')>0)
573 | {
574 | $splitlists = explode('i:',$result[1]);
575 | unset($splitlists[0]);
576 | foreach($splitlists as $key=>$value)
577 | {
578 | $listdata = explode("\n",trim($value));
579 | $listname = $listdata[0];
580 | unset($listdata[0]);
581 | $formattedlist[$listname] = $listdata;
582 | }
583 | foreach($formattedlist as $key=>$value)
584 | {
585 | $listname = $key;
586 | foreach($value as $keyinner=>$valueinner)
587 | {
588 | if(substr_count($valueinner,"u:")>0)
589 | {
590 | $chunkdata = $this->googleDownloader('http://'.trim(str_replace('u:','',$valueinner)),false,"data");
591 | $processed = $this->processChunks($chunkdata[1],$listname);
592 | $this->outputmsg("Saved a chunk file");
593 | }
594 | elseif(substr_count($valueinner,"ad:")>0)
595 | {
596 | if(substr_count($valueinner,',')>0)
597 | {
598 | $valueinner = explode(',',trim(str_replace("ad:","",$valueinner)));
599 | foreach($valueinner as $keyadd=>$valueadd)
600 | {
601 | $this->deleteRange($valueadd,'a',$listname);
602 | }
603 | }
604 | else
605 | $this->deleteRange(trim(str_replace("ad:","",$valueinner)),'a',$listname);
606 | }
607 | elseif(substr_count($valueinner,"sd:")>0)
608 | {
609 | if(substr_count($valueinner,',')>0)
610 | {
611 | $valueinner = explode(',',trim(str_replace("sd:","",$valueinner)));
612 | foreach($valueinner as $keyadd=>$valueadd)
613 | {
614 | $this->deleteRange($valueadd,'s',$listname);
615 | }
616 | }
617 | else
618 | $this->deleteRange(trim(str_replace("sd:","",$valueinner)),'s',$listname);
619 | }
620 | }
621 |
622 | }
623 | }
624 | else
625 | {
626 | $this->outputmsg('No data available in list');
627 | }
628 | }
629 | $this->trans_commit();
630 | return true;
631 | }
632 | /*Shortcut to run updater*/
633 | function runUpdate()
634 | {
635 | $this->checkTimeout('data');
636 | $require = "";
637 | foreach($this->usinglists as $value)
638 | $require .= $this->formattedRequest($value);
639 | $this->outputmsg("Using $require");
640 | $this->getData($require);
641 | }
642 | //LOOKUP FUNCTIONS
643 | /*Used to check the canonicalize function*/
644 | function validateMethod()
645 | {
646 | //Input => Expected
647 | $cases = array(
648 | "http://host/%25%32%35" => "http://host/%25",
649 | "http://host/%25%32%35%25%32%35" => "http://host/%25%25",
650 | "http://host/%2525252525252525" => "http://host/%25",
651 | "http://host/asdf%25%32%35asd" => "http://host/asdf%25asd",
652 | "http://host/%%%25%32%35asd%%" => "http://host/%25%25%25asd%25%25",
653 | "http://www.google.com/" => "http://www.google.com/",
654 | "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/" => "http://168.188.99.26/.secure/www.ebay.com/",
655 | "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/" => "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/",
656 | "http://host%23.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B" => 'http://host%23.com/~a!b@c%23d$e%25f^00&11*22(33)44_55+',
657 | "http://3279880203/blah" => "http://195.127.0.11/blah",
658 | "http://www.google.com/blah/.." => "http://www.google.com/",
659 | "www.google.com/" => "http://www.google.com/",
660 | "www.google.com" => "http://www.google.com/",
661 | "http://www.evil.com/blah#frag" => "http://www.evil.com/blah",
662 | "http://www.GOOgle.com/" => "http://www.google.com/",
663 | "http://www.google.com.../" => "http://www.google.com/",
664 | "http://www.google.com/foo\tbar\rbaz\n2" => "http://www.google.com/foobarbaz2",
665 | "http://www.google.com/q?" => "http://www.google.com/q?",
666 | "http://www.google.com/q?r?" => "http://www.google.com/q?r?",
667 | "http://www.google.com/q?r?s" => "http://www.google.com/q?r?s",
668 | "http://evil.com/foo#bar#baz" => "http://evil.com/foo",
669 | "http://evil.com/foo;" => "http://evil.com/foo;",
670 | "http://evil.com/foo?bar;" => "http://evil.com/foo?bar;",
671 | "http://\x01\x80.com/" => "http://%01%80.com/",
672 | "http://notrailingslash.com" => "http://notrailingslash.com/",
673 | "http://www.gotaport.com:1234/" => "http://www.gotaport.com:1234/",
674 | " http://www.google.com/ " => "http://www.google.com/",
675 | "http:// leadingspace.com/" => "http://%20leadingspace.com/",
676 | "http://%20leadingspace.com/" => "http://%20leadingspace.com/",
677 | "%20leadingspace.com/" => "http://%20leadingspace.com/",
678 | "https://www.securesite.com/" => "https://www.securesite.com/",
679 | "http://host.com/ab%23cd" => "http://host.com/ab%23cd",
680 | "http://host.com//twoslashes?more//slashes" => "http://host.com/twoslashes?more//slashes"
681 | );
682 | foreach($cases as $key=>$value)
683 | {
684 | $canit = $this->Canonicalize($key);
685 | $canit = $canit['GSBURL'];
686 | if($canit==$value)
687 | $this->outputmsg("PASSED: $key");
688 | else
689 | $this->outputmsg("INVALID:
ORIGINAL: $key
EXPECTED: $value
RECIEVED: $canit
");
690 |
691 | }
692 | }
693 | /*Special thanks Steven Levithan (stevenlevithan.com) for the ridiculously complicated regex
694 | required to parse urls. This is used over parse_url as it robustly provides access to
695 | port, userinfo etc and handles mangled urls very well.
696 | Expertly integrated into phpGSB by Sam Cleaver ;)
697 | Thanks to mikegillis677 for finding the seg. fault issue in the old function.
698 | Passed validateMethod() check on 17/01/12*/
699 | function j_parseUrl($url)
700 | {
701 | $strict = '/^(?:([^:\/?#]+):)?(?:\/\/\/?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?(((?:\/(\w:))?((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/';
702 | $loose = '/^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/\/?)?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?)(((?:\/(\w:))?(\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/';
703 | preg_match($loose, $url, $match);
704 | if(empty($match))
705 | {
706 | //As odd as its sounds, we'll fall back to strict (as technically its more correct and so may salvage completely mangled urls)
707 | unset($match);
708 | preg_match($strict, $url, $match);
709 | }
710 | $parts = array("source"=>'',"scheme"=>'',"authority"=>'',"userinfo"=>'',"user"=>'',"password"=>'',"host"=>'',"port"=>'',"relative"=>'',"path"=>'',"drive"=>'',"directory"=>'',"file"=>'',"query"=>'',"fragment"=>'');
711 | switch (count ($match)) {
712 | case 15: $parts['fragment'] = $match[14];
713 | case 14: $parts['query'] = $match[13];
714 | case 13: $parts['file'] = $match[12];
715 | case 12: $parts['directory'] = $match[11];
716 | case 11: $parts['drive'] = $match[10];
717 | case 10: $parts['path'] = $match[9];
718 | case 9: $parts['relative'] = $match[8];
719 | case 8: $parts['port'] = $match[7];
720 | case 7: $parts['host'] = $match[6];
721 | case 6: $parts['password'] = $match[5];
722 | case 5: $parts['user'] = $match[4];
723 | case 4: $parts['userinfo'] = $match[3];
724 | case 3: $parts['authority'] = $match[2];
725 | case 2: $parts['scheme'] = $match[1];
726 | case 1: $parts['source'] = $match[0];
727 | }
728 | return $parts;
729 | }
730 | /*Regex to check if its a numerical IP address*/
731 | function is_ip($ip)
732 | {
733 | return preg_match("/^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])" .
734 | "(\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}$/", $ip);
735 | }
736 | /*Checks if input is in hex format*/
737 | function is_hex($x)
738 | {
739 | //Relys on the fact that hex often includes letters meaning PHP will disregard the string
740 | if(($x+3) == 3)
741 | return dechex(hexdec($x)) == $x;
742 | return false;
743 | }
744 | /*Checks if input is in octal format*/
745 | function is_octal($x)
746 | {
747 | //Relys on the fact that in IP addressing octals must begin with a 0 to denote octal
748 | return substr($x,0,1) == 0;
749 | }
750 | /*Converts hex or octal input into decimal */
751 | function hexoct2dec($value)
752 | {
753 | //As this deals with parts in IP's we can be more exclusive
754 | if(substr_count(substr($value,0,2),'0x')>0&&$this->is_hex($value))
755 | {
756 | return hexdec($value);
757 | }
758 | elseif($this->is_octal($value))
759 | {
760 | return octdec($value);
761 | }
762 | else
763 | return false;
764 | }
765 | /*Converts IP address part in HEX to decimal*/
766 | function iphexdec($hex)
767 | {
768 | //Removes any leading 0x (used to denote hex) and then and leading 0's)
769 | $temp = str_replace('0x','',$hex);
770 | $temp = ltrim($temp,"0");
771 | return hexdec($temp);
772 | }
773 | /*Converts full IP address in HEX to decimal*/
774 | function hexIPtoIP($hex)
775 | {
776 | //Remove hex identifier and leading 0's (not significant)
777 | $tempip = str_replace('0x','',$hex);
778 | $tempip = ltrim($tempip,"0");
779 | //It might be hex
780 | if($this->is_hex($tempip))
781 | {
782 | //There may be a load of junk before the part we need
783 | if(strlen($tempip)>8)
784 | {
785 | $tempip = substr($tempip,-8);
786 | }
787 | $hexplode = preg_split('//', $tempip, -1, PREG_SPLIT_NO_EMPTY);
788 | while(count($hexplode)<8)
789 | array_unshift($hexplode,0);
790 | //Normalise
791 | $newip = hexdec($hexplode[0].$hexplode[1]).'.'.hexdec($hexplode[2].$hexplode[3]).'.'.hexdec($hexplode[4].$hexplode[5]).'.'.hexdec($hexplode[6].$hexplode[7]);
792 | //Now check if its an IP
793 | if($this->is_ip($newip))
794 | return $newip;
795 | else
796 | return false;
797 | }
798 | else
799 | return false;
800 | }
801 | /*Checks if an IP provided in either hex, octal or decimal is in fact
802 | an IP address. Normalises to a four part IP address.*/
803 | function isValid_IP($ip)
804 | {
805 | //First do a simple check, if it passes this no more needs to be done
806 | if($this->is_ip($ip))
807 | return $ip;
808 |
809 | //Its a toughy... eerm perhaps its all in hex?
810 | $checkhex = $this->hexIPtoIP($ip);
811 | if($checkhex)
812 | return $checkhex;
813 |
814 | //If we're still here it wasn't hex... maybe a DWORD format?
815 | $checkdword = $this->hexIPtoIP(dechex($ip));
816 | if($checkdword)
817 | return $checkdword;
818 |
819 | //Nope... maybe in octal or a combination of standard, octal and hex?!
820 | $ipcomponents = explode('.',$ip);
821 | $ipcomponents[0] = $this->hexoct2dec($ipcomponents[0]);
822 | if(count($ipcomponents)==2)
823 | {
824 | //The writers of the RFC docs certainly didn't think about the clients! This could be a DWORD mixed with an IP part
825 | if($ipcomponents[0]<=255&&is_int($ipcomponents[0])&&is_int($ipcomponents[1]))
826 | {
827 | $threeparts = dechex($ipcomponents[1]);
828 | $hexplode = preg_split('//', $threeparts, -1, PREG_SPLIT_NO_EMPTY);
829 | if(count($hexplode)>4)
830 | {
831 | $newip = $ipcomponents[0].'.'.$this->iphexdec($hexplode[0].$hexplode[1]).'.'.$this->iphexdec($hexplode[2].$hexplode[3]).'.'.$this->iphexdec($hexplode[4].$hexplode[5]);
832 | //Now check if its valid
833 | if($this->is_ip($newip))
834 | return $newip;
835 | }
836 | }
837 | }
838 | $ipcomponents[1] = $this->hexoct2dec($ipcomponents[1]);
839 | if(count($ipcomponents)==3)
840 | {
841 | //Guess what... it could also be a DWORD mixed with two IP parts!
842 | if(($ipcomponents[0]<=255&&is_int($ipcomponents[0]))&&($ipcomponents[1]<=255&&is_int($ipcomponents[1]))&&is_int($ipcomponents[2]))
843 | {
844 | $twoparts = dechex($ipcomponents[2]);
845 | $hexplode = preg_split('//', $twoparts, -1, PREG_SPLIT_NO_EMPTY);
846 | if(count($hexplode)>3)
847 | {
848 | $newip = $ipcomponents[0].'.'.$ipcomponents[1].'.'.$this->iphexdec($hexplode[0].$hexplode[1]).'.'.$this->iphexdec($hexplode[2].$hexplode[3]);
849 | //Now check if its valid
850 | if($this->is_ip($newip))
851 | return $newip;
852 | }
853 | }
854 | }
855 | //If not it may be a combination of hex and octal
856 | if(count($ipcomponents)>=4)
857 | {
858 | $tmpcomponents = array($ipcomponents[2],$ipcomponents[3]);
859 | foreach($tmpcomponents as $key=>$value)
860 | {
861 | if(!$tmpcomponents[$key] = $this->hexoct2dec($value))
862 | return false;
863 | }
864 |
865 | array_unshift($tmpcomponents,$ipcomponents[0],$ipcomponents[1]);
866 | //Convert back to IP form
867 | $newip = implode('.',$tmpcomponents);
868 |
869 | //Now check if its valid
870 | if($this->is_ip($newip))
871 | return $newip;
872 | }
873 |
874 | //Well its not an IP that we can recognise... theres only so much we can do!
875 | return false;
876 | }
877 | /*Had to write another layer as built in PHP urlencode() escapes all non
878 | alpha-numeric Google states to only urlencode if its below 32 or above
879 | or equal to 127 (some of those are non alpha-numeric and so urlencode
880 | on its own won't work).*/
881 | function flexURLEncode($url,$ignorehash=false)
882 | {
883 | //Had to write another layer as built in PHP urlencode() escapes all non alpha-numeric
884 | //google states to only urlencode if its below 32 or above or equal to 127 (some of those
885 | //are non alpha-numeric and so urlencode on its own won't work).
886 | $urlchars = preg_split('//', $url, -1, PREG_SPLIT_NO_EMPTY);
887 | if(count($urlchars)>0)
888 | {
889 | foreach($urlchars as $key=>$value)
890 | {
891 |
892 | $ascii = ord($value);
893 | if($ascii<=32||$ascii>=127||($value=='#'&&!$ignorehash)||$value=='%')
894 | $urlchars[$key] = rawurlencode($value);
895 | }
896 | return implode('',$urlchars);
897 | }
898 | else
899 | return $url;
900 | }
901 | /*Canonicalize a full URL according to Google's definition.*/
902 | function Canonicalize($url)
903 | {
904 | //Remove line feeds, return carriages, tabs, vertical tabs
905 | $finalurl = trim(str_replace(array("\x09","\x0A","\x0D","\x0B"),'',$url));
906 | //URL Encode for easy extraction
907 | $finalurl = $this->flexURLEncode($finalurl,true);
908 | //Now extract hostname & path
909 | $parts = $this->j_parseUrl($finalurl);
910 | $hostname = $parts['host'];
911 | $path = $parts['path'];
912 | $query = $parts['query'];
913 | $lasthost = "";
914 | $lastpath = "";
915 | $lastquery = "";
916 | //Remove all hex coding (loops max of 50 times to stop craziness but should never
917 | //reach that)
918 | for ($i = 0; $i < 50; $i++) {
919 | $hostname = rawurldecode($hostname);
920 | $path = rawurldecode($path);
921 | $query = rawurldecode($query);
922 | if($hostname==$lasthost&&$path==$lastpath&&$query==$lastquery)
923 | break;
924 | $lasthost = $hostname;
925 | $lastpath = $path;
926 | $lastquery = $query;
927 | }
928 | //Deal with hostname first
929 | //Replace all leading and trailing dots
930 | $hostname = trim($hostname,'.');
931 | //Replace all consecutive dots with one dot
932 | $hostname = preg_replace("/\.{2,}/",".",$hostname);
933 | //Make it lowercase
934 | $hostname = strtolower($hostname);
935 | //See if its a valid IP
936 | $hostnameip = $this->isValid_IP($hostname);
937 | if($hostnameip)
938 | {
939 | $usingip = true;
940 | $usehost = $hostnameip;
941 | }
942 | else
943 | {
944 | $usingip = false;
945 | $usehost = $hostname;
946 | }
947 | //The developer guide has lowercasing and validating IP other way round but its more efficient to
948 | //have it this way
949 | //Now we move onto canonicalizing the path
950 | $pathparts = explode('/',$path);
951 | foreach($pathparts as $key=>$value)
952 | {
953 | if($value=="..")
954 | {
955 | if($key!=0)
956 | {
957 | unset($pathparts[$key-1]);
958 | unset($pathparts[$key]);
959 | }
960 | else
961 | unset($pathparts[$key]);
962 | }
963 | elseif($value=="."||empty($value))
964 | unset($pathparts[$key]);
965 | }
966 | if(substr($path,-1,1)=="/")
967 | $append = "/";
968 | else
969 | $append = false;
970 | $path = "/".implode("/",$pathparts);
971 | if($append&&substr($path,-1,1)!="/")
972 | $path .= $append;
973 | $usehost = $this->flexURLEncode($usehost);
974 | $path = $this->flexURLEncode($path);
975 | $query = $this->flexURLEncode($query);
976 | if(empty($parts['scheme']))
977 | $parts['scheme'] = 'http';
978 | $canurl = $parts['scheme'].'://';
979 | $realurl = $canurl;
980 | if(!empty($parts['userinfo']))
981 | $realurl .= $parts['userinfo'].'@';
982 | $canurl .= $usehost;
983 | $realurl .= $usehost;
984 | if(!empty($parts['port']))
985 | {
986 | $canurl .= ':'.$parts['port'];
987 | $realurl .= ':'.$parts['port'];
988 | }
989 | $canurl .= $path;
990 | $realurl .= $path;
991 | if(substr_count($finalurl,"?")>0)
992 | {
993 | $canurl .= '?'.$parts['query'];
994 | $realurl .= '?'.$parts['query'];
995 | }
996 | if(!empty($parts['fragment']))
997 | $realurl .= '#'.$parts['fragment'];
998 | return array("GSBURL"=>$canurl,"CleanURL"=>$realurl,"Parts"=>array("Host"=>$usehost,"Path"=>$path,"Query"=>$query,"IP"=>$usingip));
999 | }
1000 | /*SHA-256 input (short method).*/
1001 | function sha256($data)
1002 | {
1003 | return hash('sha256',$data);
1004 | }
1005 | /*Make Hostkeys for use in a lookup*/
1006 | function makeHostKey($host,$usingip)
1007 |
1008 | {
1009 | if($usingip)
1010 | $hosts = array($host."/");
1011 |
1012 | else
1013 | {
1014 | $hostparts = explode(".",$host);
1015 | if(count($hostparts)>2)
1016 | {
1017 | $backhostparts = array_reverse($hostparts);
1018 | $threeparts = array_slice($backhostparts,0,3);
1019 | $twoparts = array_slice($threeparts,0,2);
1020 | $hosts = array(implode('.',array_reverse($threeparts))."/",implode('.',array_reverse($twoparts))."/");
1021 | }
1022 | else
1023 | $hosts = array($host."/");
1024 | }
1025 | //Now make key & key prefix
1026 | $returnhosts = array();
1027 | foreach($hosts as $value)
1028 | {
1029 | $fullhash = $this->sha256($value);
1030 | $returnhosts[$fullhash] = array("Host"=>$value,"Prefix"=>substr($fullhash,0,8),"Hash"=>$fullhash);
1031 | }
1032 | return $returnhosts;
1033 | }
1034 | /*Hash up a list of values from makePrefixes() (will possibly be
1035 | combined into that function at a later date*/
1036 | function makeHashes($prefixarray)
1037 | {
1038 | if(count($prefixarray)>0)
1039 | {
1040 | $returnprefixes = array();
1041 | foreach($prefixarray as $value)
1042 | {
1043 | $fullhash = $this->sha256($value);
1044 | $returnprefixes[$fullhash] = array("Original"=>$value,"Prefix"=>substr($fullhash,0,8),"Hash"=>$fullhash);
1045 | }
1046 | return $returnprefixes;
1047 | }
1048 | else
1049 | return false;
1050 | }
1051 | /*Make URL prefixes for use after a hostkey check*/
1052 | function makePrefixes($host,$path,$query,$usingip)
1053 | {
1054 | $prefixes = array();
1055 | //Exact hostname in the url
1056 | $hostcombos = array();
1057 | $hostcombos[] = $host;
1058 | if(!$usingip)
1059 | {
1060 | $hostparts = explode('.',$host);
1061 | $backhostparts = array_reverse($hostparts);
1062 | if(count($backhostparts)>5)
1063 | $maxslice = 5;
1064 | else
1065 | $maxslice = count($backhostparts);
1066 | $topslice = array_slice($backhostparts,0,$maxslice);
1067 | while($maxslice>1)
1068 | {
1069 | $hostcombos[] = implode('.',array_reverse($topslice));
1070 | $maxslice--;
1071 | $topslice = array_slice($backhostparts,0,$maxslice);
1072 | }
1073 | }
1074 | else
1075 | $hostcombos[] = $host;
1076 | $hostcombos = array_unique($hostcombos);
1077 | $variations = array();
1078 | if(!empty($path))
1079 | {
1080 | $pathparts = explode("/",$path);
1081 | if(count($pathparts)>4)
1082 | $upperlimit = 4;
1083 | else
1084 | $upperlimit = count($pathparts);
1085 | }
1086 | foreach($hostcombos as $key=>$value)
1087 | {
1088 | if(!empty($query))
1089 | $variations[] = $value.$path.'?'.$query;
1090 | $variations[] = $value.$path;
1091 | if(!empty($path))
1092 | {
1093 | $i = 0;
1094 | $pathiparts = "";
1095 | while($i<$upperlimit)
1096 | {
1097 | if($i!=count($pathparts)-1)
1098 | $pathiparts = $pathiparts.$pathparts[$i]."/";
1099 | else
1100 | $pathiparts = $pathiparts.$pathparts[$i];
1101 | $variations[] = $value.$pathiparts;
1102 | $i++;
1103 | }
1104 | }
1105 | }
1106 | $variations = array_unique($variations);
1107 | return $this->makeHashes($variations);
1108 | }
1109 | /*Process data provided from the response of a full-hash GSB
1110 | request*/
1111 | function processFullLookup($data)
1112 | {
1113 | $clonedata = $data;
1114 | $extracthash = array();
1115 | while(strlen($clonedata)>0)
1116 | {
1117 | $splithead = explode("\n",$clonedata,2);
1118 | $chunkinfo = explode(':',$splithead[0]);
1119 | $listname = $chunkinfo[0];
1120 | $addchunk = $chunkinfo[1];
1121 | $chunklen = $chunkinfo[2];
1122 | $chunkdata = bin2hex(substr($splithead[1],0,$chunklen));
1123 | while(strlen($chunkdata)>0)
1124 | {
1125 | $extracthash[$listname][$addchunk] = substr($chunkdata,0,64);
1126 | $chunkdata = substr($chunkdata,64);
1127 | }
1128 | $clonedata = substr($splithead[1],$chunklen);
1129 | }
1130 | return $extracthash;
1131 | }
1132 | /*Add a full-hash key to a prefix or hostkey (the variable is $prefix but it could
1133 | be either).*/
1134 | function addFullHash($prefix,$chunknum,$fullhash,$listname)
1135 | {
1136 | $buildtrunk = $listname."-a";
1137 | //First check hosts
1138 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '$prefix' AND `Chunknum` = '$chunknum'");
1139 | if($result&&mysqli_num_rows($result)>0)
1140 | {
1141 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC))
1142 | {
1143 | if(empty($row['FullHash']))
1144 | {
1145 | //We've got a live one! Insert the full hash for it
1146 | $addresult = mysqli_query($this->dbLink, "UPDATE `$buildtrunk-hosts` SET `FullHash` = '$fullhash' WHERE `ID` = '{$row['ID']}';");
1147 | if(!$addresult)
1148 | $this->fatalerror("Could not cache full-hash key. $prefix, $chunknum, $fullhash, $listname");
1149 | }
1150 | }
1151 | }
1152 | else
1153 | {
1154 | //If there are no rows it must be a prefix
1155 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-prefixes` WHERE `Prefix` = '$prefix'");
1156 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC))
1157 | {
1158 | if(empty($row['FullHash']))
1159 | {
1160 | $resulttwo = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '{$row['Hostkey']}' AND `Chunknum` = '$chunknum'");
1161 | while ($rowtwo = mysqli_fetch_array($resulttwo, MYSQLI_ASSOC))
1162 | {
1163 | if(hexdec($rowtwo['Count'])>0)
1164 | {
1165 | $addresult = mysqli_query($this->dbLink, "UPDATE `$buildtrunk-prefixes` SET `FullHash` = '$fullhash' WHERE `ID` = '{$row['ID']}';");
1166 | if(!$addresult)
1167 | $this->fatalerror("Could not cache full-hash key. $prefix, $chunknum, $fullhash, $listname");
1168 | }
1169 | }
1170 | }
1171 | }
1172 | }
1173 |
1174 | }
1175 | /*Check database for any cached full-length hashes for a given prefix.*/
1176 | function cacheCheck($prefix)
1177 | {
1178 | foreach($this->usinglists as $value)
1179 | {
1180 | $buildtrunk = $value."-a";
1181 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '$prefix' AND `FullHash` != ''");
1182 | if($result&&mysqli_num_rows($result)>0)
1183 | {
1184 | while($row = mysqli_fetch_array($result, MYSQLI_ASSOC))
1185 | {
1186 | return array($row['FullHash'],$row['Chunknum']);
1187 | }
1188 | }
1189 | else
1190 | {
1191 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-prefixes` WHERE `Prefix` = '$prefix' AND `FullHash` != ''");
1192 | if($result&&mysqli_num_rows($result)>0)
1193 | {
1194 | while($row = mysqli_fetch_array($result, MYSQLI_ASSOC))
1195 | {
1196 | $resulttwo = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '{$row['Hostkey']}'");
1197 | while ($rowtwo = mysqli_fetch_array($resulttwo, MYSQLI_ASSOC))
1198 | {
1199 | if(hexdec($rowtwo['Count'])>0)
1200 | {
1201 | return array($row['FullHash'],$rowtwo['Chunknum']);
1202 | }
1203 |
1204 | }
1205 | }
1206 | }
1207 | }
1208 | }
1209 | return false;
1210 | }
1211 | /*Do a full-hash lookup based on prefixes provided, returns (bool) true
1212 | on a match and (bool) false on no match.*/
1213 | function doFullLookup($prefixes,$originals)
1214 | {
1215 | //Store copy of original prefixes
1216 | $cloneprefixes = $prefixes;
1217 | //They should really all have the same prefix size.. we'll just check one
1218 | $prefixsize = strlen($prefixes[0][0])/2;
1219 | $length = count($prefixes)*$prefixsize;
1220 | foreach($prefixes as $key=>$value)
1221 | {
1222 | //Check cache on each iteration (we can return true earlier if we get a match!)
1223 | $cachechk = $this->cacheCheck($value[0]);
1224 | if($cachechk)
1225 | {
1226 | if(isset($originals[$cachechk[0]]))
1227 | {
1228 | //Check from same chunk
1229 | foreach($cloneprefixes as $nnewvalue)
1230 | {
1231 | if($nnewvalue[1]==$cachechk[1]&&$value[0]==$originals[$cachechk[0]]['Prefix'])
1232 | {
1233 | //From same chunks
1234 | return true;
1235 | }
1236 |
1237 | }
1238 | }
1239 | }
1240 | $prefixes[$key] = pack("H*",$value[0]);
1241 | }
1242 | //No cache matches so we continue with request
1243 | $body = "$prefixsize:$length\n".implode("",$prefixes);
1244 |
1245 | $buildopts = array(CURLOPT_POST=>true,CURLOPT_POSTFIELDS=>$body);
1246 | $result = $this->googleDownloader("http://safebrowsing.clients.google.com/safebrowsing/gethash?client=api&apikey=".$this->apikey."&appver=".$this->version."&pver=".$this->apiversion,$buildopts,"lookup");
1247 |
1248 | if($result[0]['http_code']==200&&!empty($result[1]))
1249 | {
1250 | //Extract hashes from response
1251 | $extractedhashes = $this->processFullLookup($result[1]);
1252 | //Loop over each list
1253 | foreach($extractedhashes as $key=>$value)
1254 | {
1255 | //Loop over each value in each list
1256 | foreach($value as $newkey=>$newvalue)
1257 | {
1258 | if(isset($originals[$newvalue]))
1259 | {
1260 | //Okay it matches a full-hash we have, now to check they're from the same chunks
1261 | foreach($cloneprefixes as $nnewvalue)
1262 | {
1263 | if($nnewvalue[1]==$newkey&&$nnewvalue[0]==$originals[$newvalue]['Prefix'])
1264 | {
1265 | //From same chunks
1266 | //Add full hash to database (cache)
1267 | $this->addFullHash($nnewvalue[0],$nnewvalue[1],$newvalue,$key);
1268 | return true;
1269 | }
1270 |
1271 | }
1272 | }
1273 | }
1274 | }
1275 | return false;
1276 | }
1277 | elseif($result[0]['http_code']==204&&strlen($result[1])==0)
1278 | {
1279 | //204 Means no match
1280 | return false;
1281 | }
1282 | else
1283 | {
1284 | //"No No No! This just doesn't add up at all!"
1285 | $this->fatalerror("ERROR: Invalid response returned from GSB ({$result[0]['http_code']})");
1286 | }
1287 | }
1288 | /*Checks to see if a match for a prefix is found in the sub table, if it is then we won't do
1289 | a full-hash lookup. Return true on match in sub list, return false on negative.*/
1290 | function subCheck($listname,$prefixlist,$mode)
1291 | {
1292 | $buildtrunk = $listname.'-s';
1293 | if($mode=="prefix")
1294 | {
1295 | //Mode is prefix so the add part was a prefix, not a hostkey so we just check prefixes (saves a lookup)
1296 | foreach($prefixlist as $value)
1297 | {
1298 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-prefixes` WHERE `Prefix` = '{$value[0]}'");
1299 | if($result&&mysqli_num_rows($result)>0)
1300 | {
1301 | //As interpreted from Developer Guide if theres a match in sub list it cancels out the add listing
1302 | //we'll double check its from the same chunk just to be pedantic
1303 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC))
1304 | {
1305 | if(hexdec($row['AddChunkNum'])==$value[1])
1306 | return true;
1307 | }
1308 | }
1309 |
1310 | }
1311 | return false;
1312 | }
1313 | elseif($mode=="hostkey")
1314 | {
1315 | //Mode is hostkey
1316 | foreach($prefixlist as $value)
1317 | {
1318 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-prefixes` WHERE `Hostkey` = '{$value[0]}'");
1319 | if($result&&mysqli_num_rows($result)>0)
1320 | {
1321 | //As interpreted from Developer Guide if theres a match in sub list it cancels out the add listing
1322 | //we'll double check its from the same chunk just to be pedantic
1323 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC))
1324 | {
1325 | if(hexdec($row['AddChunkNum'])==$value[1]&&empty($row['Prefix']))
1326 | return true;
1327 | }
1328 | }
1329 |
1330 | }
1331 | return false;
1332 | }
1333 | $this->fatalerror("Invalid SubCheck Mode $mode");
1334 | }
1335 | /*Does a full URL lookup on given lists, will check if its in database, if slight match there then
1336 | will do a full-hash lookup on GSB, returns (bool) true on match and (bool) false on negative.*/
1337 | function doLookup($url)
1338 | {
1339 | $lists = $this->usinglists;
1340 | //First canonicalize the URL
1341 | $canurl = $this->Canonicalize($url);
1342 | //Make hostkeys
1343 | $hostkeys = $this->makeHostKey($canurl['Parts']['Host'],$canurl['Parts']['IP']);
1344 | $matches = array();
1345 | foreach($lists as $key=>$value)
1346 | {
1347 | $buildtrunk = $value.'-a';
1348 | //Loop over each list
1349 | foreach($hostkeys as $keyinner=>$valueinner)
1350 | {
1351 | //Within each list loop over each hostkey
1352 | $result = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '{$valueinner['Prefix']}'");
1353 | if($result&&mysqli_num_rows($result)>0)
1354 | {
1355 | //For each hostkey match
1356 | while ($row = mysqli_fetch_array($result, MYSQLI_ASSOC))
1357 | {
1358 | $nicecount = hexdec($row['Count']);
1359 | if($nicecount>0)
1360 | {
1361 | //There was a match and the count is more than one so there are prefixes!
1362 | //Hash up a load of prefixes and create the build query if we haven't done so already
1363 | if(!isset($prefixes))
1364 | {
1365 | $prefixes = $this->makePrefixes($canurl['Parts']['Host'],$canurl['Parts']['Path'],$canurl['Parts']['Query'],$canurl['Parts']['IP']);
1366 | $buildprequery = array();
1367 | foreach($prefixes as $prefix)
1368 | {
1369 | $buildprequery[] = " `Prefix` = '{$prefix['Prefix']}' ";
1370 | }
1371 | $buildprequery = implode("OR",$buildprequery);
1372 | }
1373 | //Check if there are any matching prefixes
1374 | $resulttwo = mysqli_query($this->dbLink, "SELECT * FROM `$buildtrunk-prefixes` WHERE ($buildprequery) AND `Hostkey` = '{$row['Hostkey']}'");
1375 | if($resulttwo&&mysqli_num_rows($resulttwo)>0)
1376 | {
1377 | //We found prefix matches
1378 | $prematches = array();
1379 | $prelookup = array();
1380 | while ($rowtwo = mysqli_fetch_array($resulttwo, MYSQLI_ASSOC))
1381 | {
1382 | $prematches[] = array($rowtwo['Prefix'],$row['Chunknum']);
1383 | }
1384 | //Before we send off any requests first check whether its in sub table
1385 | $subchk = $this->subCheck($value,$prematches,"prefix");
1386 | if(!$subchk)
1387 | {
1388 | //Send off any matching prefixes to do some full-hash key checks
1389 | $flookup = $this->doFullLookup($prematches,$prefixes);
1390 | if($flookup)
1391 | return true;
1392 | }
1393 | }
1394 | //If we didn't find matches then do nothing (keep looping till end and it'll return negative)
1395 | }
1396 | else
1397 | {
1398 | $subchk = $this->subCheck($value,array(array($row['Hostkey'],$row['Chunknum'])),"hostkey");
1399 | if(!$subchk)
1400 | {
1401 | //There was a match but the count was 0 that entire domain could be a match, Send off to check
1402 | $flookup = $this->doFullLookup(array(array($row['Hostkey'],$row['Chunknum'])),$hostkeys);
1403 | if($flookup)
1404 | return true;
1405 | }
1406 | }
1407 | }
1408 | }
1409 | }
1410 | }
1411 | return false;
1412 |
1413 | }
1414 | }
1415 |
--------------------------------------------------------------------------------