├── data ├── whiteip.txt ├── spamvertignore.txt ├── bayesignore.txt ├── scalpel.txt └── blockisp.txt ├── admin ├── tpl │ ├── exception.inc │ ├── scorexml.inc │ ├── plonker_blocked.inc │ ├── tables.inc │ ├── bayes.inc │ ├── bayeslinks.inc │ ├── apc.inc │ ├── test.inc │ ├── bayeskill.inc │ ├── updatebayes.inc │ ├── daily.inc │ ├── stats.inc │ ├── proxies.inc │ ├── bayesadd.inc │ ├── accs.inc │ ├── main.inc │ ├── accmsg.inc │ ├── layout.inc │ ├── bayesinfo.inc │ ├── live.inc │ └── plonker.inc ├── .htaccess ├── phpinfo.php ├── debug.php ├── autologin.php ├── bayeslinks.php ├── apc.php ├── daily.php ├── classifyips.php ├── bayesadd.php ├── test.php ├── postformatter.php ├── proxies.php ├── cleanup.php ├── live.php ├── main.php ├── score.php ├── stats.php ├── updatebayes.php ├── accs.php ├── plonker.php └── index.php ├── README.md ├── class ├── sblamservices.php ├── sblamtest.php ├── sblambasepost.php ├── sblampdo.php ├── domainmatch.php ├── sblamhttp.php ├── plonker.php ├── asyncsocket.php ├── sblambase.php ├── interfaces.php ├── sblam.php ├── sblampost.php └── asyncdns.php ├── tests ├── optimist.php ├── latenight.php ├── hashes.php ├── mixedformatting.php ├── whiteip.php ├── scalpel.php ├── plonker.php ├── sorbs.php ├── phphttpbl.php ├── linkmania.php ├── surbl.php ├── correctfields.php ├── domains.php ├── networks.php ├── keywords.php ├── dronebl.php ├── dnsbl.php ├── dedupe.php ├── linksleeve.php ├── mailexploit.php ├── challenge.php ├── http.php ├── throttle.php ├── spamvertises.php └── bayes.php ├── config.ini ├── dbconn.php └── index.php /data/whiteip.txt: -------------------------------------------------------------------------------- 1 | 89.239.105.61 #biuro Koszalin -------------------------------------------------------------------------------- /admin/tpl/exception.inc: -------------------------------------------------------------------------------- 1 | 2 |
Used ${info/num_entries} of ${info/num_slots} slots, ${info/expunges} expunges, ${info/num_hits} hits / ${info/num_misses} 7 | misses.
8 |Allocated ${sma/num_seg} * ${sma/seg_size}, unused ${sma/avail_mem}
9 |is spam vs 11 | ham.
12 | 13 | 14 |normalized spam vs ham. ${spammy}% spammy
15 | 16 | -------------------------------------------------------------------------------- /tests/latenight.php: -------------------------------------------------------------------------------- 1 | getPostTime()) 9 | { 10 | $hour = date("G",$t); 11 | if ($hour >= 2 and $hour <= 5) return array(0.15,self::CERTAINITY_LOW,"Late-night posting ({$hour}h)"); 12 | if ($hour >= 1 and $hour <= 7) return array(0.09,self::CERTAINITY_LOW,"Late-night posting ({$hour}h)"); 13 | } 14 | } 15 | 16 | 17 | static function info() 18 | { 19 | return array( 20 | 'name'=>'Late-night posting', 21 | 'desc'=>'Bots spam 24h/day, but humans usually don\'t', 22 | 'remote'=>false, 23 | ); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /admin/tpl/updatebayes.inc: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 9 | 10 | 13 | 14 |Processed ${done} posts. Failed to process ${failed} posts.
17 |Because of server load pauses between posts took ${waited} seconds, which is ${waitperpost}s per post.
18 || 4 | ${r/hour} | 5 | 6 |
| ${php:date('H i s',r['timestamp'])} | 11 |
| ${l/time}s | ${l/percent}% | 9 |
|---|
| Number of posts: | ${total} (${tempo}/day) |
|---|---|
| Unverified posts: | ${unverified} (${tough} tough ones) |
| Unadded posts: | ${unadded} unadded |
| Hams: | ${hams} (${hamsprc}%) |
| false negatives | ${fhams} (${phams}%) |
| Spams: | ${spams} (${spamsprc}%) |
| false positives | ${fspams} (${pspams}%) |
| Accuracy: | ${accuracy}% (${unsure}% unsure) |
Proxy to be trusted must pass X-Forwarded-For header (or similar) and must have revDNS that is on this list.
Added words
7 |Added links:
Links scored ${php:round(spamvertresult[0],2)} with cert ${php:round(spamvertresult[1],2)} for ${spamvertresult/3}
10 |Words scored ${php:round(bayesresult[0],2)} with cert ${php:round(bayesresult[1],2)} for ${bayesresult/3}
11 || # | u | Spams | Cnt | Ham | Hosts | Date | JS | |
|---|---|---|---|---|---|---|---|---|
| 17 | | ✉ | 19 |20 | | 21 | | 22 | | 23 | | 25 | | 26 | |
Load: ${load}. 8 | Last update ping: ${php:round((time()-apc_fetch('update_active'))/60,1)}m.
9 | 10 | 18 | 19 |Sending to account id #${account/id}
12 | -------------------------------------------------------------------------------- /data/bayesignore.txt: -------------------------------------------------------------------------------- 1 | się 2 | mam 3 | mnie 4 | jeszcze 5 | tego 6 | dla 7 | sobie 8 | chyba 9 | może 10 | już 11 | ten 12 | wiem 13 | bardzo 14 | tam 15 | jestem 16 | quote 17 | też 18 | przez 19 | było 20 | nawet 21 | będzie 22 | juz 23 | pod 24 | przy 25 | tej 26 | pozdrawiam 27 | więc 28 | wszystko 29 | trzeba 30 | ich 31 | moze 32 | albo 33 | zawsze 34 | jeśli 35 | gdzie 36 | moge 37 | has 38 | poza 39 | czym 40 | wiec 41 | moje 42 | tyle 43 | nim 44 | bardziej 45 | lub 46 | kto 47 | żeby 48 | temat 49 | bym 50 | oraz 51 | nad 52 | mój 53 | mogę 54 | inne 55 | sama 56 | których 57 | takich 58 | ooh 59 | see 60 | few 61 | extra 62 | stop 63 | było 64 | być 65 | masz 66 | taki 67 | sam 68 | had 69 | she 70 | he 71 | did 72 | code 73 | blog 74 | asp 75 | forum viewtopic 76 | forum topic 77 | com forum 78 | sid 79 | org forum 80 | web 81 | viewtopic 82 | viewtopic php 83 | com pl 84 | shy 85 | google 86 | pl 87 | com web 88 | a href 89 | net http 90 | com http 91 | ale 92 | jak 93 | tak 94 | czy 95 | tym 96 | am 97 | am the 98 | and 99 | bez 100 | biz 101 | blogspot 102 | blogspot com 103 | org url 104 | can 105 | cgi 106 | com 107 | com href 108 | com html 109 | com img 110 | com url 111 | com www 112 | czyli 113 | do 114 | edu 115 | edu href 116 | edu html 117 | edu img 118 | for 119 | forum 120 | from 121 | gmail 122 | gmail com 123 | here 124 | href 125 | href http 126 | htm 127 | yahoo com 128 | yahoo 129 | html 130 | html a 131 | html http 132 | html the 133 | html url 134 | http 135 | http www 136 | img img 137 | in 138 | index 139 | index asp 140 | index htm 141 | index html 142 | index php 143 | info 144 | info href 145 | info html 146 | info img 147 | is 148 | it 149 | jest 150 | me 151 | net 152 | nic 153 | nie 154 | of 155 | org 156 | org href 157 | org html 158 | org http 159 | org img 160 | php 161 | php url 162 | sie 163 | some 164 | test 165 | that 166 | the 167 | this 168 | to 169 | tylko 170 | url 171 | url http 172 | url url 173 | us 174 | we 175 | which 176 | with 177 | with the 178 | www 179 | you 180 | your 181 | -------------------------------------------------------------------------------- /tests/surbl.php: -------------------------------------------------------------------------------- 1 | addedhosts = array(); 8 | 9 | $links = $p->getLinks(); 10 | if ($links) foreach($links as $link) 11 | { 12 | if ($host = $link->getHostname()) $this->checkHost($host); 13 | if ($domain = $link->getDomain()) $this->checkHost($domain); 14 | } 15 | 16 | } 17 | 18 | function testPost(ISblamPost $p) 19 | { 20 | return $this->getCheckHostResults(); 21 | } 22 | 23 | protected $addedhosts; 24 | function checkHost($host) 25 | { 26 | // remove number-only subdomains and www. prefix (these are noise) 27 | if (preg_match("!(?:^|\.)(?:www\.)?(?:[0-9]+\.)?((?:[a-z0-9-]+\.)?[a-z0-9-]+\.[a-z]{2,4})$!",$host,$m)) 28 | { 29 | $host = $m[1]; 30 | } 31 | 32 | if (isset($this->addedhosts[$host])) return; 33 | $this->addedhosts[$host] = true; 34 | 35 | SblamURI::gethostbynameasync($host . '.multi.surbl.org'); 36 | } 37 | 38 | function getCheckHostResults() 39 | { 40 | $score=0; 41 | foreach($this->addedhosts as $host => $whatever) 42 | { 43 | $host .= '.multi.surbl.org'; 44 | 45 | $res = SblamURI::gethostbynamel($host); 46 | if ($res && count($res)) 47 | { 48 | d($res,"found banned $host"); 49 | $score += 3; 50 | $mask = 0; 51 | foreach($res as $ip) 52 | { 53 | $mask |= ip2long($ip); 54 | } 55 | $mask &= 127 - 1 - 16; // outblaze list has false positives, so lower score 56 | d($mask,"banned mask"); 57 | while($mask) 58 | { 59 | $score++; $mask >>= 1; 60 | } 61 | d("total surbl score until now is $score"); 62 | } else d("$host not listed $res"); 63 | } 64 | 65 | $finalscore = min(0.4 + $score/25, 1.5); 66 | 67 | if ($score) return array($finalscore, ($score >= 13)?self::CERTAINITY_HIGH:self::CERTAINITY_NORMAL,"Linked sites in SURBL (".round($finalscore,1)." = $score)"); 68 | return NULL; 69 | } 70 | 71 | static function info() 72 | { 73 | return array( 74 | 'name'=>'SURBL DNS RBL', 75 | 'desc'=>'Checks for banned hostnames in Spam URI Realtime Blocklists', 76 | 'remote'=>false, 77 | ); 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /admin/test.php: -------------------------------------------------------------------------------- 1 | getSblamBase(); 20 | 21 | $config = Server::getDefaultConfig(); 22 | 23 | $config['throttle']['enabled'] = '0'; // FIXME: this should be handled within plugins 24 | $config['linksleeve']['enabled'] = '0'; 25 | $config['dupes']['enabled'] = '0'; 26 | 27 | $sblam = new Sblam($config, $this->services); 28 | 29 | $num = !empty($_POST['num']) ? intval($_POST['num']) : 100; 30 | foreach($this->services->getDB()->query("SELECT id FROM posts_meta WHERE spamscore IS NULL and spamcert IS NULL ORDER BY rand() LIMIT 31 | $num")->fetchAll(PDO::FETCH_ASSOC) as $r) 32 | { 33 | $score = $sblam->testPost($base->getPostById($r['id'])); 34 | 35 | $this->services->getDB()->prepareExecute("UPDATE posts_meta SET spamscore=?,spamcert=? WHERE id=?",array(round($score[0]*100),round($score[1]*100),$r['id'])); 36 | $this->services->getDB()->prepareExecute("UPDATE posts_data SET spamreason=? WHERE id=?",array($score[2],$r['id'])); 37 | } 38 | } 39 | 40 | function id($id) 41 | { 42 | $base = $this->getSblamBase(); 43 | if (!($post = $base->getPostById($id))) throw new Exception("No post $id"); 44 | 45 | $score = $this->test($post); 46 | if ($score) 47 | { 48 | $post->setSpamScore($score); 49 | $post->setSpamReason($score[2]); 50 | } 51 | 52 | return array( 53 | 'title'=>'Tested', 54 | 'score'=>$score, 55 | 'post'=>$post, 56 | ); 57 | } 58 | 59 | protected function test(ISblamPost $post) 60 | { 61 | $sblam = $this->getSblam(); 62 | $score = $sblam->testPost($post); 63 | return $score; 64 | } 65 | 66 | } 67 | 68 | -------------------------------------------------------------------------------- /admin/postformatter.php: -------------------------------------------------------------------------------- 1 | '.htmlspecialchars(html_entity_decode(html_entity_decode($m[0],ENT_QUOTES,'UTF-8'),ENT_QUOTES,'UTF-8')).''; 8 | } 9 | 10 | private static function highlight_text($frag) 11 | { 12 | return nl2br(str_replace("","-",preg_replace_callback('/&#?[a-zA-Z0-9]+;/',array('self','highlight_entity_callback'),htmlspecialchars($frag))));
13 | }
14 |
15 | private static function highlight_inline($frag)
16 | {
17 | $parts = preg_split('!(?:https?://|\bwww\.)([^\]\[\s\(\)<>\"\']+)!ui',$frag,NULL,PREG_SPLIT_DELIM_CAPTURE);
18 | $parts[] = '';
19 | $out = '';
20 | for($i=1; $i < count($parts); $i += 2)
21 | {
22 | $out .= preg_replace('!([^<>&\s/-]{10})([^<>&\s/-]{10})!u','\1\2',self::highlight_text($parts[$i-1]));
23 | if (strlen($parts[$i])) $out .= ''.
25 | preg_replace('!([^<>&\s/-]{10})([^<>&\s/-]{10})!u','\1\2',self::highlight_text(substr(urldecode($parts[$i]),0,100))).' ☠';
26 | }
27 | return $out;
28 | }
29 |
30 | static function highlight($post)
31 | {
32 | $post = preg_replace("!(?:\s*\r?\n){3,}!u","\n\n",$post);
33 | $parts = preg_split('!(<[a-z]+[^>]*>|[a-z]+\s*>|\[[a-z]+\s*=[^\]<>]*\]|\[/?[a-z]+\s*\])!ui',$post,NULL,PREG_SPLIT_DELIM_CAPTURE);
34 | $parts[] = '';
35 | $out = '';
36 | for($i=1; $i < count($parts); $i += 2)
37 | {
38 | $out .= self::highlight_inline($parts[$i-1]);
39 | $out .= ''.self::highlight_inline($parts[$i]).'';
40 | }
41 | return $out;
42 | }
43 |
44 | static function formatreason($reason)
45 | {
46 | $reason = htmlspecialchars($reason);
47 | return preg_replace('#h:([a-z0-9 \w](?:\([^()]*?\)(?!;)|[^(\)])*)\)#ue','\'\1)\'',htmlspecialchars($reason));
48 | }
49 | }
50 |
51 |
--------------------------------------------------------------------------------
/tests/correctfields.php:
--------------------------------------------------------------------------------
1 | hasURI($p->getAuthorEmail()))
16 | {
17 | $score = 0.2;
18 | if ($this->hasURI($p->getAuthorURI())) $score = 0.4;
19 |
20 | $p->addLink($p->getAuthorEmail()); // expose it!
21 |
22 | $out[] = array($score,self::CERTAINITY_LOW,"Link stuffed in e-mail field");
23 | }
24 |
25 | if ($this->hasURI($p->getAuthorName()))
26 | {
27 | $score = 0.1;
28 | if ($this->hasURI($p->getAuthorURI())) $score = 0.3;
29 |
30 | $p->addLink($p->getAuthorName()); // expose it!
31 |
32 | $out[] = array($score,self::CERTAINITY_LOW,"Link stuffed in name field");
33 | }
34 | else if ("" === $p->getAuthorName())
35 | {
36 | $out[] = array(0.1,self::CERTAINITY_LOW,"Anonymous");
37 | }
38 |
39 | if ($cnt = substr_count($p->getAuthorURI(),"http://") > 1)
40 | {
41 | $out[] = array($cnt/10+0.2,self::CERTAINITY_LOW, "Multiple links in author URI field");
42 | }
43 | if ($cnt = substr_count($p->getAuthorURI()," 1)
44 | {
45 | $out[] = array($cnt/5+0.2,self::CERTAINITY_LOW, "HTML in author URI field");
46 | }
47 |
48 | $longs = 0;
49 | if (strlen($p->getAuthorName()) > 50) $longs++;
50 | if (strlen($p->getAuthorEmail()) > 50) $longs++;
51 | if (strlen($p->getAuthorURI()) > 150) $longs++;
52 |
53 | if ($longs) $out[] = array($longs/10+0.1,self::CERTAINITY_LOW, "Looong text in name/e-mail/URI fields");
54 |
55 |
56 | if ("" === trim($p->getRawContent()))
57 | {
58 | $out[] = array(0.6,self::CERTAINITY_LOW,"Empty content");
59 | }
60 |
61 | if (preg_match('!\b(google\.com|msn\.com)\b!',$p->getAuthorURI()))
62 | {
63 | $out[] = array(0.2,self::CERTAINITY_LOW,"Not your website");
64 | }
65 |
66 | return $out;
67 | }
68 |
69 |
70 | static function info()
71 | {
72 | return array(
73 | 'name'=>'Check if fields are correctly filled-in',
74 | 'desc'=>'Ensure that post doesn\'t have mistakes that bots would easily make',
75 | 'remote'=>false,
76 | );
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/tests/domains.php:
--------------------------------------------------------------------------------
1 | blacklistfile = empty($settings['chongqed'])?'blacklist.chongqed.txt':$settings['chongqed'];
12 |
13 | if (!class_exists('DomainMatch')) throw new Exception("DomainMatch class required");
14 | }
15 |
16 | function importChongqed($filename)
17 | {
18 | $lines = @file($filename); if (!$lines) return false;
19 |
20 | $domains = array();
21 | $regex = '!^(?:'.preg_quote('https?:\/\/([^\/]*\.)?','!').')?(.*?)(?:#.*)?$!';
22 | foreach($lines as $line)
23 | {
24 | $line = stripslashes(trim(preg_replace($regex,'\1',$line))); //remove comments and useless fragment of regexp
25 | $this->blacklist->add($line);
26 | }
27 | return true;
28 | }
29 |
30 | protected function check(SblamURI $link)
31 | {
32 | $domain = $link->getDomain();
33 |
34 | $min = max(2, count(explode('.',$domain))+1);
35 |
36 | $res = $this->blacklist->check($link->getHostname());
37 |
38 | if ($res >= $min) return $res + 1 - $min;
39 | return 0;
40 | }
41 |
42 | function testPost(ISblamPost $p)
43 | {
44 | if ($this->blacklist === NULL)
45 | {
46 | $this->blacklist = new DomainMatch();
47 | if (!$this->importChongqed($this->blacklistfile)) throw new Exception("Unable to import chongqed.org blacklist from {$this->blacklistfile}");
48 | }
49 |
50 | $res4=0;
51 | $domains = array();
52 |
53 | if ($uri = $p->getAuthorURI())
54 | {
55 | $uri = new SblamURI($uri);
56 | if ($tmp = $this->check($uri))
57 | {
58 | $domains[$uri->getHostname()] = true;
59 | $res4 += $tmp;
60 | }
61 | }
62 |
63 | foreach($p->getLinks() as $uri)
64 | {
65 | if ($tmp = $this->check($uri))
66 | {
67 | $domains[$uri->getHostname()] = true;
68 | $res4 += $tmp;
69 | }
70 | }
71 |
72 | if ($res4) return array(0.8, self::CERTAINITY_NORMAL, "Blacklisted domains (".implode(', ',array_keys($domains)).")");
73 | }
74 |
75 | static function info()
76 | {
77 | return array(
78 | 'name'=>'Chongqed.org blacklist',
79 | 'desc'=>'Blacklist used by MediaWiki',
80 | 'remote'=>false,
81 | );
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/admin/proxies.php:
--------------------------------------------------------------------------------
1 | $this->services->getDB()->query("/*maxtime10*/SELECT t.host,count(r.host) as ipcnt FROM trustedproxies t LEFT JOIN dnscache r ON r.host = t.host GROUP BY t.host ORDER BY t.host")->fetchAll(PDO::FETCH_ASSOC),
9 | );
10 | }
11 |
12 | function post_index()
13 | {
14 | if (!empty($_POST['remove']))
15 | {
16 | $this->services->getDB()->prepareExecute("DELETE FROM trustedproxies WHERE host = ?",array($_POST['remove']));
17 | }
18 | elseif (!empty($_POST['add']))
19 | {
20 | $sblam = $this->getSblam();
21 | if (SblamURI::gethostbyname($_POST['add']))
22 | {
23 | $this->services->getDB()->prepareExecute("INSERT INTO trustedproxies(host) VALUES(?)",array($_POST['add']));
24 | $_POST['add']='';
25 | }
26 | else throw new Exception($_POST['add'].' does not resolve');
27 | }
28 | return $this->index();
29 | }
30 |
31 | private function lookup(array $hosts)
32 | {
33 | $sblam = $this->getSblam(); // init SblamURI
34 | foreach($hosts as $h)
35 | {
36 | d(SblamURI::gethostbyname($h['host']),$h['host']);
37 | }
38 | }
39 |
40 | function post_cache()
41 | {
42 | switch($_POST['type'])
43 | {
44 | case 'insecure':
45 | $this->services->getDB()->exec("/*maxtime30*/INSERT INTO dnscache (host,ip) SELECT t.host,r.ip FROM trustedproxies t LEFT JOIN dnscache d ON d.host = t.host INNER JOIN dnscache r ON t.host = r.host WHERE d.host IS NULL");
46 | break;
47 | case 'missing':
48 | $this->lookup($this->services->getDB()->query("/*maxtime20*/SELECT t.host FROM trustedproxies t LEFT JOIN dnscache r ON t.host = r.host WHERE r.host IS NULL")->fetchAll(PDO::FETCH_ASSOC));
49 | break;
50 | default:
51 | $this->lookup($this->services->getDB()->query("/*maxtime20*/SELECT t.host FROM trustedproxies t")->fetchAll(PDO::FETCH_ASSOC));
52 | break;
53 | }
54 | die();
55 | return array('redirect'=>'proxies');
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/tests/networks.php:
--------------------------------------------------------------------------------
1 | whitelist = trim(preg_replace('/[^a-z]+/','|',trim($whitelist)),"|");
17 | $this->blacklist = trim(preg_replace('/[^a-z]+/','|',trim($blacklist)),"|");
18 | $this->isps = trim(preg_replace('/[^a-z0-9.]+/','|',trim($isps)),"|");
19 | }
20 |
21 | function preTestPost(ISblamPost $p)
22 | {
23 | foreach($p->getAuthorIPs() as $ip)
24 | {
25 | SblamURI::gethostbyaddrasync($ip);
26 | }
27 | }
28 |
29 | function testPost(ISblamPost $p)
30 | {
31 | // whitelist only direct connection (because other can be forged) and only when there aren't any objectionable hosts there
32 |
33 |
34 | $out = array();
35 | $firstIP = true;
36 | $whitelisted = false;
37 | foreach($p->getAuthorIPs() as $ip)
38 | {
39 | $rev = SblamURI::gethostbyaddr($ip);
40 | if (!$rev) continue;
41 | if (is_array($rev)) {warn($rev,'gethostbyaddr returned array');$rev = reset($rev);} // WTF?
42 |
43 | if (preg_match('!(?:\.|^)(?:'.$this->isps.')$!',$rev)) $out[] = array(0.5, self::CERTAINITY_LOW, "Sent from blacklisted ISP ($rev)");
44 | else if ($firstIP && preg_match('!\.(?:'.$this->whitelist.')$!',$rev)) $whitelisted = true;
45 | else if (preg_match('!\.(?:'.$this->blacklist.')$!',$rev)) $out[] = array(0.35, self::CERTAINITY_LOW, "Sent from blacklisted TLD ($rev)");
46 |
47 | $firstIP = false;
48 | }
49 |
50 | if (!count($out) && $whitelisted) return array(-0.25, self::CERTAINITY_LOW, "Sent from whitelisted TLD ($rev)");
51 | if (count($out)) return $out;
52 | }
53 |
54 | static function info()
55 | {
56 | return array(
57 | 'name'=>'Sender\'s network (country/ISP)',
58 | 'desc'=>'Marks posts sent from suspicious networks, suspicious.',
59 | 'remote'=>false,
60 | );
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/tests/keywords.php:
--------------------------------------------------------------------------------
1 | blocklist = !empty($settings['blocklist2']) ? $settings['blocklist2'] : 'data/blocklist.txt';
12 | }
13 |
14 | function importBlocklist2($blocklist2file)
15 | {
16 | $file = @file_get_contents($blocklist2file); if (!$file) return false;
17 | foreach(explode("\n",$file) as $line)
18 | {
19 | if ('block:'===substr($line,0,6))
20 | {
21 | $this->keywords[] = preg_replace('![^a-z0-9._-]+!',' ',strtolower(trim(substr($line,6))));
22 | }
23 | }
24 | return true;
25 | }
26 |
27 | private function testText($text)
28 | {
29 | return count(array_intersect($this->getKeywordsFromText($text),$this->keywords));
30 | }
31 |
32 | // crappy, us-ascii only
33 | private function getKeywordsFromText($x)
34 | {
35 | return str_word_count(preg_replace('![^a-z0-9._-]+!',' ',strtolower($x)),1);
36 | }
37 |
38 | function testPost(ISblamPost $p)
39 | {
40 | if ($this->keywords === NULL)
41 | {
42 | $this->importBlocklist2($this->blocklist);
43 | }
44 | if (!count($this->keywords)) return NULL;
45 |
46 | $res1 = $this->testText($p->getText().' '.$p->getAuthorName());
47 | $res2=0;
48 | $res3=0;
49 |
50 | $alluris = '';
51 | if ($uri = $p->getAuthorURI()) $alluris .= strtolower($uri);
52 | if ($uri = $p->getAuthorEmail()) $alluris .= ' '.strtolower($uri);
53 |
54 | foreach($p->getLinks() as $link)
55 | {
56 | if ($label = $link->getLabel()) $res2 += count(array_intersect($this->getKeywordsFromText($label),$this->keywords));
57 | if ($uri = $link->getURI()) $alluris .= ' '.strtolower($uri);
58 | }
59 |
60 | $cnt=0;
61 | str_replace($this->keywords,$this->keywords,$alluris,$res3);
62 |
63 | $sum = $res1+$res2+$res3;
64 | if (!$sum) return NULL;//array(-0.1,self::CERTAINITY_LOW, "No banned keywords");
65 |
66 | $out = array();
67 | if ($res1) $out[] = array(1.2-1/($res1), $sum > 2 ? self::CERTAINITY_HIGH : self::CERTAINITY_NORMAL, "Banned keywords in text ($res1)");
68 | if ($res2) $out[] = array(1.2-1/($res2+1), self::CERTAINITY_HIGH, "Banned keywords in link labels ($res2)");
69 | if ($res3) $out[] = array(1.2-1/($res3), $sum > 2 ? self::CERTAINITY_HIGH : self::CERTAINITY_NORMAL, "Banned keywords in URLs ($res3)");
70 | if (count($out)) return $out;
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/admin/tpl/layout.inc:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Database is built from ${php:total['ham'] + total['spam']} posts (${totalwordsf} words), of which ${totalspamf} (${php:round(100* total['spam']/(total['ham']+total['spam']),1)}%) are spam
19 | 20 || Empty | ||
| Rating | 27 |Word | Popular | 28 |
|---|---|---|
| ${php:round(abs(100*word['rate']))}% | 32 |33 | ↓ 34 | ${word/word | php:'[' . urlencode(word['wordh']) . ']'} 35 | | 36 |${php:round((word['spammy'] + word['hammy']) /3 ,2)}% | 37 |
| ID | date | IP | score | URL | time | + | Reason |
|---|
Show statistics of ${total} ips.
16 |There are ${total} IPs from about ${totalnets} subnets in the database.
17 | 18 | 19 | 23 | 24 | 25 | 26 | 29 | 30 | 34 | 35 |