├── LICENSE.md ├── README.md ├── composer.json └── src ├── Bootstrap.php ├── HashMap.php ├── Install.php ├── Sensitive.php ├── SensitiveException.php ├── config ├── SensitiveWord.txt └── plugin │ └── isszz │ └── webman-sensitive │ ├── app.php │ └── bootstrap.php ├── facade └── Sensitive.php ├── helpers.php └── word └── SensitiveWord.txt /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) isszz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # webman-sensitive 2 | Webman 敏感词检测,过滤,标记 3 | 4 |

5 | Minimum PHP Version 6 | Minimum Webman Version 7 | Stable Version 8 | Total Downloads 9 | License 10 |

11 | 12 | ## 安装 13 | 14 | ```shell 15 | composer require isszz/webman-sensitive 16 | ``` 17 | 18 | ## 配置 19 | ```php 20 | return [ 21 | 'enable' => true, 22 | 23 | // 支持file,array,也可以指向自己敏感词库文件路径 24 | // file模式时,敏感词库位于webman根目录的config/plugin/isszz/webman-sensitive/SensitiveWord.txt,也可以指向自定义的词库文件路径 25 | 'mode' => 'file', 26 | 27 | 'config' => [ 28 | 'repeat' => true, // 重复替换为敏感词相同长度的字符 29 | 'replace_char' => '*', // 替换字符 30 | // 标记敏感词,标签生成敏感词 31 | 'mark' => 'mark', 32 | ], 33 | 34 | // 干扰因子 35 | 'interference_factors' => [ 36 | ' ', '&', '*', '/', '|', '@', '.', '^', '~', '$', 37 | ], 38 | 39 | // 数组模式敏感词 40 | 'sensitive_words' => [ 41 | '工口', 42 | '里番', 43 | '性感美女', 44 | ] 45 | ]; 46 | 47 | ``` 48 | 49 | ## 使用 50 | 51 | facade方式 52 | ```php 53 | use isszz\sensitive\facade\Sensitive; 54 | 55 | class Index 56 | { 57 | public function add() 58 | { 59 | // 设置干扰因子 60 | Sensitive::interferenceFactor(['(', ')', ',', ',', ';', ';', '。']); 61 | 62 | // 添加一个额外的敏感词,words参数支持单敏感词,多词也可以用|分割,或者直接传入多个敏感词数组 63 | // words = 性感美女|分隔符 64 | // words = ['性感美女', '数组'] 65 | Sensitive::add(words: '性感美女'); 66 | 67 | // 删除的敏感词,words参数同添加的格式一样 68 | // 第二个参数once为true时,只针对当次: is,replace,mark,操作生效 69 | Sensitive::remove(words: '性感美女', once: true); 70 | 71 | // 检测 72 | if (Sensitive::is(content: '检测语句')) { 73 | return json(['code' => 1, 'msg' => '输入内容包含敏感词,请注意用词。']); 74 | } 75 | 76 | // 替换 77 | $replaced = Sensitive::add(words: '垃圾')->replace(content: '替换语句垃圾要被替换', replaceChar: '*', repeat: false); 78 | 79 | // 标记敏感词 80 | $marked = Sensitive::add(words: '尼玛')->mark(content: '标记的内容,这里尼玛要被标记', tag: 'bad'); 81 | 82 | // 提取内容中的所有敏感词 83 | $badWords = Sensitive::add('狗逼')->get('提取内容中的所有敏感词,狗逼,还有SB都会被提取'); 84 | 85 | // 自定义敏感词库 86 | // 文件方式 87 | Sensitive::custom('/config/SensitiveWord.txt') 88 | ->is('检测尼玛的语句'); 89 | 90 | // 数组方式 91 | Sensitive::custom([ 92 | '垃圾', '尼玛', 93 | //... 94 | ])->is('检测尼玛的语句'); 95 | 96 | // 文件词库模式,可以添加新敏感词到词库文件 97 | // data参数可以是一个数组也可以是用|分割敏感词的字符串 98 | // append参数为true是追加模式,false时先提取词库,再去重,然后合并写入 99 | $sensitive->addWordToFile(data: '狗逼|傻缺', append: false); 100 | } 101 | } 102 | 103 | ``` 104 | 依赖注入方式 105 | ```php 106 | use isszz\sensitive\Sensitive; 107 | 108 | class Index 109 | { 110 | public function add(Sensitive $sensitive) 111 | { 112 | // 设置干扰因子 113 | $sensitive->interferenceFactor(['(', ')', ',', ',', ';', ';', '。']); 114 | // ... 115 | } 116 | } 117 | ``` 118 | 助手函数方式 119 | ```php 120 | class Index 121 | { 122 | public function add(Sensitive $sensitive) 123 | { 124 | // 设置干扰因子,后返回的Sensitive实例可使用:is,replace,mark 125 | sensitive_interference_factor(['(', ')', ',', ',', ';', ';', '。']) 126 | ->is('检测语句尼玛'); 127 | 128 | // 添加敏感词,后返回的Sensitive实例可使用:is,replace,mark 129 | sensitive_add(words: '性感美女') 130 | ->mark('你是一个性感美女,你说是不是?'); 131 | 132 | // 移除敏感词,后返回的Sensitive实例可使用:is,replace,mark 133 | // 第二个参数once为true时,只针对当次: is,replace,mark,操作生效 134 | sensitive_remove(words: '工口', once: true) 135 | ->mark('你这个SB是不是想看工口类的动漫?哈哈!'); 136 | 137 | // 检测敏感词 138 | if (sensitive_is('检测语句尼玛')) { 139 | return json(['code' => 1, 'msg' => '输入内容包含敏感词,请注意用词。']); 140 | } 141 | 142 | // replaceChar是用来设置要被替换的敏感词 143 | // repeat为true时根据检测出的敏感词长度设置replaceChar 144 | $replaced = sensitive_replace(content: '替换语句垃圾要被替换', replaceChar: '*', repeat: true); 145 | // tag参数是用来设置包裹敏感词的标签名例如: 这里SB要被标记 146 | $marked = sensitive_mark(content: '标记的内容,这里SB要被标记', tag: 'bad'); 147 | 148 | // 提取内容中的所有敏感词 149 | $badWords = sensitive_get('谁是SB,谁是狗逼,谁是傻缺'); 150 | 151 | // 自定义敏感词库 152 | // 文件方式 153 | sensitive_custom('/config/SensitiveWord.txt') 154 | ->is('检测尼玛的语句'); 155 | 156 | // 数组方式 157 | sensitive_custom([ 158 | '垃圾', '尼玛', 159 | //... 160 | ])->is('检测尼玛的语句'); 161 | 162 | // 文件词库模式,可以添加新敏感词到词库文件 163 | // data参数可以是一个数组也可以是用|分割敏感词的字符串 164 | // append参数为true是追加模式,false时先提取词库,再去重,然后合并写入 165 | sensitive_add_word_to_file(data: '狗逼|傻缺', append: false); 166 | 167 | } 168 | } 169 | ``` -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "isszz/webman-sensitive", 3 | "description": "Webman 敏感词检测,过滤,标记", 4 | "license": "MIT", 5 | "authors": [ 6 | { 7 | "name": "isszz", 8 | "email": "isszz@qq.com" 9 | } 10 | ], 11 | "require": { 12 | "php": ">=8.0.0", 13 | "workerman/webman-framework": ">=1.4" 14 | }, 15 | "autoload": { 16 | "files": [ 17 | "src/helpers.php" 18 | ], 19 | "psr-4": { 20 | "isszz\\sensitive\\": "src" 21 | } 22 | }, 23 | "config": { 24 | "sort-packages": true 25 | }, 26 | "minimum-stability": "dev", 27 | "extra": { 28 | "branch-alias": { 29 | "dev-master": "dev-dev" 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/Bootstrap.php: -------------------------------------------------------------------------------- 1 | {$name}(... $arguments); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/HashMap.php: -------------------------------------------------------------------------------- 1 | containsKey($key)) { 25 | $this->hashTable[$key] = $value; 26 | return null; 27 | } 28 | 29 | $_temp = $this->hashTable[$key]; 30 | $this->hashTable[$key] = $value; 31 | 32 | return $_temp; 33 | } 34 | 35 | /** 36 | * 根据key获取对应的value 37 | * 38 | * @param $key 39 | * @return mixed|null 40 | */ 41 | public function get($key) 42 | { 43 | if ($this->containsKey($key)) { 44 | return $this->hashTable[$key]; 45 | } 46 | 47 | return null; 48 | } 49 | 50 | /** 51 | * 删除指定key的键值对 52 | * 53 | * @param $key 54 | * @return mixed|null 55 | */ 56 | public function remove($key) 57 | { 58 | if ($this->containsKey($key)) { 59 | $tempTable = []; 60 | $tempValue = $this->hashTable[$key]; 61 | foreach ($this->hashTable as $k => $v) { 62 | if ($k !== $key) { 63 | $tempTable[$k] = $v; 64 | } 65 | } 66 | $this->hashTable = $tempTable; 67 | return $tempValue; 68 | } 69 | 70 | return null; 71 | } 72 | 73 | /** 74 | * 获取HashMap的所有键值 75 | * 76 | * @return array 77 | */ 78 | public function keys() 79 | { 80 | return array_keys($this->hashTable); 81 | } 82 | 83 | /** 84 | * 获取HashMap的所有value值 85 | * 86 | * @return array 87 | */ 88 | public function values() 89 | { 90 | return array_values($this->hashTable); 91 | } 92 | 93 | /** 94 | * 将一个HashMap的值全部put到当前HashMap中 95 | * 96 | * @param $map 97 | */ 98 | public function putAll($map) 99 | { 100 | if (!$map->isEmpty() && $map->size() > 0) { 101 | $keys = $map->keys(); 102 | foreach ($keys as $key) { 103 | $this->put($key, $map->get($key)); 104 | } 105 | } 106 | 107 | return ; 108 | } 109 | 110 | /** 111 | * 移除HashMap中所有元素 112 | * 113 | * @return bool 114 | */ 115 | public function removeAll() 116 | { 117 | $this->hashTable = null; 118 | return true; 119 | } 120 | 121 | /** 122 | * 判断HashMap中是否包含指定的值 123 | * 124 | * @param $value 125 | * @return bool 126 | */ 127 | public function containsValue($value) 128 | { 129 | while ($curValue = current($this->hashTable)) { 130 | if ($curValue == $value) { 131 | return true; 132 | } 133 | 134 | next($this->hashTable); 135 | } 136 | 137 | return false; 138 | } 139 | 140 | /** 141 | * 判断HashMap中是否包含指定的键key 142 | * 143 | * @param $key 144 | * @return bool 145 | */ 146 | public function containsKey($key) 147 | { 148 | return array_key_exists($key, $this->hashTable); 149 | } 150 | 151 | /** 152 | * 获取HashMap中元素个数 153 | * 154 | * @return int 155 | */ 156 | public function size() 157 | { 158 | return count($this->hashTable); 159 | } 160 | 161 | /** 162 | * 判断HashMap是否为空 163 | * 164 | * @return bool 165 | */ 166 | public function isEmpty() 167 | { 168 | return (count($this->hashTable) == 0); 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/Install.php: -------------------------------------------------------------------------------- 1 | 'config/plugin/isszz/webman-sensitive', 15 | ]; 16 | 17 | /** 18 | * Install 19 | * @return void 20 | */ 21 | public static function install() 22 | { 23 | static::installByRelation(); 24 | } 25 | 26 | /** 27 | * Uninstall 28 | * @return void 29 | */ 30 | public static function uninstall() 31 | { 32 | self::uninstallByRelation(); 33 | } 34 | 35 | /** 36 | * installByRelation 37 | * @return void 38 | */ 39 | public static function installByRelation() 40 | { 41 | foreach (static::$pathRelation as $source => $dest) { 42 | if ($pos = strrpos($dest, '/')) { 43 | $parent_dir = base_path().'/'.substr($dest, 0, $pos); 44 | if (!is_dir($parent_dir)) { 45 | mkdir($parent_dir, 0777, true); 46 | } 47 | } 48 | copy_dir(__DIR__ . "/$source", base_path()."/$dest"); 49 | echo "Create $dest"; 50 | } 51 | } 52 | 53 | /** 54 | * uninstallByRelation 55 | * @return void 56 | */ 57 | public static function uninstallByRelation() 58 | { 59 | foreach (static::$pathRelation as $source => $dest) { 60 | $path = base_path()."/$dest"; 61 | if (!is_dir($path) && !is_file($path)) { 62 | continue; 63 | } 64 | echo "Remove $dest"; 65 | if (is_file($path) || is_link($path)) { 66 | unlink($path); 67 | continue; 68 | } 69 | remove_dir($path); 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/Sensitive.php: -------------------------------------------------------------------------------- 1 | config = $config['config']; 64 | 65 | // 配置中的干扰因子 66 | $this->interferenceFactors = $config['interference_factors'] ?? []; 67 | 68 | if ($mode == 'array') { 69 | $this->setTree($config['sensitive_words'] ?? []); 70 | } else { 71 | $this->setFile( 72 | is_file($mode) ? $mode : config_path('plugin') . DIRECTORY_SEPARATOR .'isszz'. DIRECTORY_SEPARATOR .'webman-sensitive'. DIRECTORY_SEPARATOR .'SensitiveWord.txt' 73 | ); 74 | } 75 | } 76 | 77 | /** 78 | * 被检测内容是否合法|简写 79 | * 80 | * @param $content 81 | * 82 | * @return bool 83 | * @throws \isszz\sensitive\SensitiveException 84 | */ 85 | public function is(string $content) 86 | { 87 | return $this->check($content); 88 | } 89 | 90 | /** 91 | * 被检测内容是否合法 92 | * 93 | * @param $content 94 | * 95 | * @return bool 96 | * @throws \isszz\sensitive\SensitiveException 97 | */ 98 | public function check(string $content) 99 | { 100 | $this->contentLength = sensitive_mb_strlen($content, 'utf-8'); 101 | 102 | for ($length = 0; $length < $this->contentLength; $length++) { 103 | $matchFlag = 0; 104 | $tempMap = $this->wordTree; 105 | for ($i = $length; $i < $this->contentLength; $i++) { 106 | $keyChar = mb_substr($content, $i, 1, 'utf-8'); 107 | 108 | // 检测干扰因子 109 | if ($this->checkInterferenceFactor($keyChar)) { 110 | $matchFlag++; 111 | continue; 112 | } 113 | 114 | // 获取指定节点树 115 | $nowMap = $tempMap->get($keyChar); 116 | 117 | // 不存在节点树,直接返回 118 | if (empty($nowMap)) { 119 | break; 120 | } 121 | 122 | // 找到相应key,偏移量+1 123 | $tempMap = $nowMap; 124 | $matchFlag++; 125 | 126 | // 如果为最后一个匹配规则,结束循环,返回匹配标识数 127 | if ($nowMap->get('isEnd') === false) { 128 | continue; 129 | } 130 | 131 | return true; 132 | } 133 | 134 | // 找到相应key 135 | if ($matchFlag <= 0) { 136 | continue; 137 | } 138 | 139 | // 需匹配内容标志位往后移 140 | $length += $matchFlag - 1; 141 | } 142 | 143 | $this->recoverRemove(); 144 | return false; 145 | } 146 | 147 | /** 148 | * 替换敏感字字符 149 | * 150 | * @param string $content 文本内容 151 | * @param string $replaceChar 替换字符 152 | * @param bool $repeat 重复替换为敏感词相同长度的字符 153 | * @param int $matchType 匹配类型,默认为最小匹配规则 154 | * 155 | * @return mixed 156 | * @throws \isszz\sensitive\SensitiveException 157 | */ 158 | public function replace(string $content, string $replaceChar = '', bool $repeat = false, $matchType = 1) 159 | { 160 | if (empty($content)) { 161 | throw new SensitiveException('Please fill in the content of the test', 1); 162 | } 163 | 164 | $replaceChar = $replaceChar ?: $this->config['replace_char']; 165 | 166 | if(!$repeat) { 167 | $repeat = $this->config['repeat'] ?? false; 168 | } 169 | 170 | $badWordList = $this->badWordList ? $this->badWordList : $this->get($content, $matchType); 171 | 172 | // 未检测到敏感词,直接返回 173 | if (empty($badWordList)) { 174 | return $content; 175 | } 176 | 177 | foreach ($badWordList as $badWord) { 178 | $hasReplacedChar = $replaceChar; 179 | // $badWord = $this->ltrimInterferenceFactorBadWord($badWord); 180 | if ($repeat) { 181 | $hasReplacedChar = $this->dfaBadWordConversChars($badWord, $replaceChar); 182 | } 183 | 184 | $content = str_replace($badWord, $hasReplacedChar, $content); 185 | } 186 | 187 | return $content; 188 | } 189 | 190 | /** 191 | * 标记敏感词 192 | * 193 | * @param string $content 文本内容 194 | * @param string $tag 标签开头,如mark 195 | * @param int $matchType 匹配类型,默认为最小匹配规则 196 | * 197 | * @return mixed 198 | * @throws \isszz\sensitive\SensitiveException 199 | */ 200 | public function mark(string $content, string $tag = '', $matchType = 1) 201 | { 202 | if (empty($content)) { 203 | throw new SensitiveException('Please fill in the content of the test', 1); 204 | } 205 | 206 | if(!$tag) { 207 | $tag = $this->config['mark'] ?? 'mark'; 208 | } 209 | 210 | $sTag = '<'. $tag .'>'; 211 | $eTag = ''; 212 | 213 | $badWordList = $this->badWordList ? $this->badWordList : $this->get($content, $matchType); 214 | 215 | // 未检测到敏感词,直接返回 216 | if (empty($badWordList)) { 217 | return $content; 218 | } 219 | 220 | $badWordList = array_unique($badWordList); 221 | 222 | foreach ($badWordList as $badWord) { 223 | // $badWord = $this->ltrimInterferenceFactorBadWord($badWord); 224 | $replaceChar = $sTag . $badWord . $eTag; 225 | $content = str_replace($badWord, $replaceChar, $content); 226 | } 227 | 228 | return $content; 229 | } 230 | 231 | /** 232 | * 检测文字中的敏感词 233 | * 234 | * @param string $content 待检测内容 235 | * @param int $matchType 匹配类型,默认为最小匹配规则 236 | * @param int $wordNum 需要获取的敏感词数量,默认获取全部 237 | * 238 | * @return array 239 | * @throws \isszz\sensitive\SensitiveException 240 | */ 241 | public function get(string $content, $matchType = 1, $wordNum = 0) 242 | { 243 | $this->contentLength = sensitive_mb_strlen($content, 'utf-8'); 244 | $badWordList = []; 245 | 246 | for ($length = 0; $length < $this->contentLength; $length++) { 247 | $matchFlag = 0; 248 | $flag = false; 249 | $tempMap = $this->wordTree; 250 | 251 | for ($i = $length; $i < $this->contentLength; $i++) { 252 | $keyChar = mb_substr($content, $i, 1, 'utf-8'); 253 | 254 | // 检测干扰因子 255 | if ($this->checkInterferenceFactor($keyChar)) { 256 | $matchFlag++; 257 | continue; 258 | } 259 | 260 | // 获取指定节点树 261 | $nowMap = $tempMap->get($keyChar); 262 | 263 | // 不存在节点树,直接返回 264 | if (empty($nowMap)) { 265 | break; 266 | } 267 | 268 | // 存在,则判断是否为最后一个 269 | $tempMap = $nowMap; 270 | 271 | // 找到相应key,偏移量+1 272 | $matchFlag++; 273 | 274 | // 如果为最后一个匹配规则,结束循环,返回匹配标识数 275 | if ($nowMap->get('isEnd') === false) { 276 | continue; 277 | } 278 | 279 | $flag = true; 280 | 281 | // 最小规则,直接退出 282 | if ($matchType === 1) { 283 | break; 284 | } 285 | } 286 | 287 | if (!$flag) { 288 | $matchFlag = 0; 289 | } 290 | 291 | if ($matchFlag > 0) { 292 | $badWordList[] = $this->ltrimInterferenceFactorBadWord(mb_substr($content, $length, $matchFlag, 'utf-8')); 293 | 294 | // 有返回数量限制 295 | if ($wordNum > 0 && count($badWordList) == $wordNum) { 296 | return $badWordList; 297 | } 298 | 299 | // 需匹配内容标志位往后移 300 | $length += $matchFlag - 1; 301 | } 302 | } 303 | 304 | $this->recoverRemove(); 305 | 306 | return $badWordList; 307 | } 308 | 309 | /** 310 | * 添加额外的敏感词 311 | * 312 | * @param string|array $words 313 | * 314 | * @return $this 315 | */ 316 | public function add(string|array $words) 317 | { 318 | if(!$this->wordTree) { 319 | throw new SensitiveException('Please initialize Sensitive first', 6); 320 | } 321 | 322 | if (is_string($words) && str_contains($words, '|')) { 323 | $words = explode('|', $words); 324 | } 325 | 326 | foreach ((array) $words as $word) { 327 | $this->buildWordToTree($word); 328 | } 329 | 330 | return $this; 331 | } 332 | 333 | /** 334 | * 删除敏感词 335 | * 336 | * @param string|array $words 337 | * @param bool $once 338 | * 339 | * @return $this 340 | */ 341 | public function remove(string|array $words, bool $once = false) 342 | { 343 | if(!$this->wordTree) { 344 | throw new SensitiveException('Please initialize Sensitive first', 6); 345 | } 346 | 347 | if (is_string($words) && str_contains($words, '|')) { 348 | $words = explode('|', $words); 349 | } 350 | 351 | foreach ((array) $words as $word) { 352 | $this->removeToTree($word, $once); 353 | } 354 | 355 | return $this; 356 | } 357 | 358 | /** 359 | * 从敏感词树删除 360 | * 361 | * @param string|array $words 362 | * @param bool $once 363 | * 364 | * @return $this 365 | */ 366 | public function removeToTree(string $word, bool $once = false) 367 | { 368 | for ($i = 0; $i < sensitive_mb_strlen($word, 'utf-8'); $i++) { 369 | $this->wordTree->remove(mb_substr($word, $i, 1, 'utf-8')); 370 | } 371 | 372 | // 放入待恢复 373 | $once === true && $this->removeList[] = $word; 374 | } 375 | 376 | /** 377 | * 恢复删除的敏感词 378 | * 379 | * @return mixed 380 | */ 381 | public function recoverRemove() 382 | { 383 | if (!$this->removeList) { 384 | return false; 385 | } 386 | 387 | $this->add($this->removeList); 388 | $this->removeList = []; 389 | 390 | return true; 391 | } 392 | 393 | /** 394 | * 自定义构建敏感词树,文件方式|数组方式 395 | * 396 | * @param string|array $custom 397 | * 398 | * @return $this 399 | * @throws \isszz\sensitive\SensitiveException 400 | */ 401 | public function custom(string|array $custom) 402 | { 403 | if (is_string($custom)) { 404 | $this->setFile($custom); 405 | } 406 | 407 | if (is_array($custom)) { 408 | $this->setTree($custom); 409 | } 410 | 411 | return $this; 412 | } 413 | 414 | /** 415 | * 构建敏感词树,文件方式 416 | * 417 | * @param string $file 418 | * 419 | * @return $this 420 | * @throws \isszz\sensitive\SensitiveException 421 | */ 422 | public function setFile(string $file) 423 | { 424 | if (!is_file($file)) { 425 | throw new SensitiveException('The sensitive words file does not exist', 3); 426 | } 427 | 428 | $file = $this->getSensitiveWordPath($file); 429 | 430 | $this->wordTree = $this->wordTree ?: new HashMap; 431 | 432 | foreach ($this->yieldToReadFile($file) as $words) { 433 | $this->buildWordToTree(trim($words)); 434 | } 435 | 436 | return $this; 437 | } 438 | 439 | /** 440 | * 构建敏感词树,数组方式 441 | * 442 | * @param array|null $sensitiveWords 443 | * 444 | * @return $this 445 | * @throws \isszz\sensitive\SensitiveException 446 | */ 447 | public function setTree(array|null $sensitiveWords = null) 448 | { 449 | if (empty($sensitiveWords)) { 450 | throw new SensitiveException('The sensitive words cannot be empty', 2); 451 | } 452 | 453 | $this->wordTree = $this->wordTree ?: new HashMap; 454 | 455 | foreach ($sensitiveWords as $word) { 456 | $this->buildWordToTree(trim($word)); 457 | } 458 | 459 | return $this; 460 | } 461 | 462 | /** 463 | * 添加干扰因子 464 | * 465 | * @param array $interferenceFactors 466 | * 467 | * @return $this 468 | */ 469 | public function interferenceFactor(array $interferenceFactors) 470 | { 471 | $this->interferenceFactors = array_unique(array_merge($this->interferenceFactors, $interferenceFactors)); 472 | 473 | return $this; 474 | } 475 | 476 | /** 477 | * 删除敏感词前的干扰因子 478 | * 479 | * @param string $word 需要处理的敏感词 480 | * 481 | * @return string 482 | */ 483 | public function ltrimInterferenceFactorBadWord(string $word) 484 | { 485 | $characters = ''; 486 | foreach($this->interferenceFactors as $interferenceFactor) { 487 | $characters .= $interferenceFactor. '\\' .' '. $interferenceFactor; 488 | } 489 | 490 | return ltrim($word, $characters); 491 | } 492 | 493 | /** 494 | * 向敏感词库文件添加新词 495 | * 496 | * @param string|array $data 添加的新敏感词 497 | * @param bool $append 是否追加模式,false时会提取后合并去掉重复再写入 498 | * 499 | * @return string 500 | */ 501 | public function addWordToFile(string|array $data, bool $append = true) 502 | { 503 | $mode = config('plugin.isszz.webman-sensitive.app.mode', 'file'); 504 | 505 | if ($mode == 'array') { 506 | throw new SensitiveException('Array mode cannot be added', 8); 507 | } 508 | 509 | $file = is_file($mode) ? $mode : config_path('plugin') . DIRECTORY_SEPARATOR .'isszz'. DIRECTORY_SEPARATOR .'webman-sensitive'. DIRECTORY_SEPARATOR .'SensitiveWord.txt'; 510 | 511 | if (!is_file($file)) { 512 | throw new SensitiveException('Sensitive thesaurus file does not exist', 7); 513 | } 514 | 515 | $file = $this->getSensitiveWordPath($file); 516 | 517 | if (is_string($data) && str_contains($data, '|')) { 518 | $data = explode('|', $data); 519 | } 520 | 521 | $data = array_filter((array) $data); 522 | 523 | // 追加模式 524 | if ($append === true) { 525 | $bool = file_put_contents($file, PHP_EOL . implode(PHP_EOL, $data), FILE_APPEND) !== false; 526 | } else { 527 | // 重写模式 528 | $words = []; 529 | foreach ($this->yieldToReadFile($file) as $word) { 530 | $words[] = trim($word); 531 | } 532 | 533 | $bool = file_put_contents($file, implode(PHP_EOL, array_unique(array_merge($words, $data)))) !== false; 534 | } 535 | 536 | // phar update file 537 | if ($bool) { 538 | $this->getSensitiveWordPath($file, true); 539 | } 540 | 541 | return $bool; 542 | } 543 | 544 | /** 545 | * 读取敏感词库文件 546 | * 547 | * @param string $file 548 | * 549 | * @throws \isszz\sensitive\SensitiveException 550 | */ 551 | protected function yieldToReadFile(string $file) 552 | { 553 | $handle = fopen($file, 'r'); 554 | 555 | if (!$handle) { 556 | throw new SensitiveException('Read file failed', 4); 557 | } 558 | 559 | while (!feof($handle)) { 560 | $line = fgets($handle); 561 | if (!is_string($line)) { 562 | continue; 563 | } 564 | 565 | yield str_replace(['\'', ' ', PHP_EOL, ','], '', $line); 566 | } 567 | 568 | fclose($handle); 569 | } 570 | 571 | /** 572 | * 将单个敏感词构建成树结构 573 | */ 574 | protected function buildWordToTree(string $word = '') 575 | { 576 | if ($word === '') { 577 | return; 578 | } 579 | 580 | $tree = $this->wordTree; 581 | 582 | $wordLength = sensitive_mb_strlen($word, 'utf-8'); 583 | for ($i = 0; $i < $wordLength; $i++) { 584 | $keyChar = mb_substr($word, $i, 1, 'utf-8'); 585 | 586 | // 获取子节点树结构 587 | $tempTree = $tree->get($keyChar); 588 | 589 | if ($tempTree) { 590 | $tree = $tempTree; 591 | } else { 592 | // 设置标志位 593 | $newTree = new HashMap; 594 | $newTree->put('isEnd', false); 595 | 596 | // 添加到集合 597 | $tree->put($keyChar, $newTree); 598 | $tree = $newTree; 599 | } 600 | 601 | // 到达最后一个节点 602 | if ($i == $wordLength - 1) { 603 | $tree->put('isEnd', true); 604 | } 605 | } 606 | 607 | return; 608 | } 609 | 610 | /** 611 | * 敏感词替换为对应长度的字符 612 | * @param $word 613 | * @param $char 614 | * 615 | * @return string 616 | * @throws \DfaFilter\Exceptions\PdsSystemException 617 | */ 618 | protected function dfaBadWordConversChars($word, $char) 619 | { 620 | $str = ''; 621 | $length = sensitive_mb_strlen($word, 'utf-8'); 622 | 623 | for ($counter = 0; $counter < $length; ++$counter) { 624 | $str .= $char; 625 | } 626 | 627 | return $str; 628 | } 629 | 630 | /** 631 | * 检测干扰因子 632 | * 633 | * @param string $word 634 | * 635 | * @return bool 636 | */ 637 | protected function checkInterferenceFactor(string $word) 638 | { 639 | return in_array($word, $this->interferenceFactors); 640 | } 641 | 642 | /** 643 | * @param $path 644 | * @return string 645 | */ 646 | protected static function getSensitiveWordPath($path, $update = false) 647 | { 648 | static $pathMaps = []; 649 | if (!class_exists(\Phar::class, false) || !\Phar::running()) { 650 | return $path; 651 | } 652 | 653 | $tmpPath = sys_get_temp_dir() ?: '/tmp'; 654 | $filePath = "$tmpPath/" . basename($path); 655 | clearstatcache(); 656 | 657 | if ((!isset($pathMaps[$path]) || !is_file($filePath)) || $update === true) { 658 | file_put_contents($filePath, file_get_contents($path)); 659 | $pathMaps[$path] = $filePath; 660 | } 661 | 662 | return $pathMaps[$path]; 663 | } 664 | } 665 | -------------------------------------------------------------------------------- /src/SensitiveException.php: -------------------------------------------------------------------------------- 1 | true, 5 | 6 | // 支持file,array,也可以指向自己敏感词库文件路径 7 | // file模式时,敏感词库位于webman根目录的config/plugin/isszz/webman-sensitive/SensitiveWord.txt,也可以指向自定义的词库文件路径 8 | 'mode' => 'file', 9 | 'config' => [ 10 | 'repeat' => true, // 重复替换为敏感词相同长度的字符 11 | 'replace_char' => '*', // 替换字符 12 | // 标记敏感词,标签生成敏感词 13 | 'mark' => 'mark', 14 | ], 15 | 16 | // 干扰因子 17 | 'interference_factors' => [ 18 | ' ', '&', '*', '/', '|', '@', '.', '^', '~', '$', 19 | ], 20 | 21 | // 数组模式敏感词 22 | 'sensitive_words' => [ 23 | '工口', 24 | '里番', 25 | '性感美女', 26 | ] 27 | ]; 28 | -------------------------------------------------------------------------------- /src/config/plugin/isszz/webman-sensitive/bootstrap.php: -------------------------------------------------------------------------------- 1 | {$name}(... $arguments); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/helpers.php: -------------------------------------------------------------------------------- 1 |