deny() {
18 | // 数据库返回的各种信息
19 | return Arrays.asList("广告");
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordCharIgnore.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.api;
2 |
3 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
4 |
5 | /**
6 | * 是否忽略某一个字符
7 | * @since 0.11.0
8 | */
9 | public interface ISensitiveWordCharIgnore {
10 |
11 | /**
12 | * 是否忽略当前字符
13 | * @param ix 下标志
14 | * @param text 字符串
15 | * @param innerContext 上下文
16 | * @return 结果
17 | */
18 | boolean ignore(final int ix,
19 | final String text,
20 | InnerSensitiveWordContext innerContext);
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/spring/annotation/Autowired.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.spring.annotation;
2 |
3 | import java.lang.annotation.ElementType;
4 | import java.lang.annotation.Retention;
5 | import java.lang.annotation.RetentionPolicy;
6 | import java.lang.annotation.Target;
7 |
8 | /**
9 | * @author binbin.hou
10 | * @since 1.0.0
11 | */
12 | @Target({ElementType.CONSTRUCTOR, ElementType.METHOD, ElementType.PARAMETER, ElementType.FIELD, ElementType.ANNOTATION_TYPE})
13 | @Retention(RetentionPolicy.RUNTIME)
14 | public @interface Autowired {
15 |
16 | boolean required() default true;
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/api/IWordReplace.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.api;
2 |
3 | /**
4 | * 敏感词替换策略
5 | *
6 | * @author binbin.hou
7 | * @since 0.2.0
8 | */
9 | public interface IWordReplace {
10 |
11 | /**
12 | * 替换
13 | *
14 | * 説明:废弃以前的字符串返回,减少对象创建,提升性能。
15 | *
16 | * @param stringBuilder 字符串连接器
17 | * @param rawText 原始字符串
18 | * @param wordResult 当前的敏感词结果
19 | * @param wordContext 上下文
20 | * @since 0.4.0
21 | */
22 | void replace(final StringBuilder stringBuilder, final String rawText, final IWordResult wordResult, final IWordContext wordContext);
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenyEmpty.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.deny;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.heaven.util.io.StreamUtil;
5 | import com.github.houbb.sensitive.word.api.IWordDeny;
6 |
7 | import java.util.ArrayList;
8 | import java.util.Collections;
9 | import java.util.List;
10 |
11 | /**
12 | * 空实现
13 | * @author binbin.hou
14 | * @since 0.19.0
15 | */
16 | @ThreadSafe
17 | public class WordDenyEmpty implements IWordDeny {
18 |
19 | @Override
20 | public List deny() {
21 | return new ArrayList<>();
22 | }
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowEmpty.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.allow;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.heaven.util.io.StreamUtil;
5 | import com.github.houbb.sensitive.word.api.IWordAllow;
6 |
7 | import java.util.ArrayList;
8 | import java.util.Collections;
9 | import java.util.List;
10 |
11 | /**
12 | * 空列表
13 | * @author binbin.hou
14 | * @since 0.19.0
15 | */
16 | @ThreadSafe
17 | public class WordAllowEmpty implements IWordAllow {
18 |
19 | @Override
20 | public List allow() {
21 | return new ArrayList<>();
22 | }
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/ignore/SensitiveWordCharIgnores.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.ignore;
2 |
3 | import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
4 |
5 | /**
6 | * @since 0.11.0
7 | */
8 | public class SensitiveWordCharIgnores {
9 |
10 | public static ISensitiveWordCharIgnore specialChars() {
11 | return new SpecialCharSensitiveWordCharIgnore();
12 | }
13 |
14 | public static ISensitiveWordCharIgnore none() {
15 | return new NoneSensitiveWordCharIgnore();
16 | }
17 |
18 | public static ISensitiveWordCharIgnore defaults() {
19 | return none();
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/spring/annotation/Bean.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.spring.annotation;
2 |
3 | import java.lang.annotation.ElementType;
4 | import java.lang.annotation.Retention;
5 | import java.lang.annotation.RetentionPolicy;
6 | import java.lang.annotation.Target;
7 |
8 | /**
9 | * @author binbin.hou
10 | * @since 1.0.0
11 | */
12 | @Target({ElementType.METHOD, ElementType.ANNOTATION_TYPE})
13 | @Retention(RetentionPolicy.RUNTIME)
14 | public @interface Bean {
15 |
16 | String[] value() default {};
17 |
18 | String[] name() default {};
19 |
20 | String initMethod() default "";
21 |
22 | String destroyMethod() default "";
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditionAlwaysTrue.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.resultcondition;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordContext;
4 | import com.github.houbb.sensitive.word.api.IWordResult;
5 | import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
6 |
7 | /**
8 | * 恒为真
9 | *
10 | * @since 0.13.0
11 | */
12 | public class WordResultConditionAlwaysTrue extends AbstractWordResultCondition {
13 |
14 | @Override
15 | protected boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
16 | return true;
17 | }
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/data/WordDatas.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.data;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordData;
4 |
5 | /**
6 | * 敏感词 map
7 | *
8 | * @author binbin.hou
9 | * @since 0.3.0
10 | */
11 | public final class WordDatas {
12 |
13 | private WordDatas(){}
14 |
15 | /**
16 | * 默认策略
17 | * @return 策略
18 | * @since 0.3.0
19 | */
20 | public static IWordData defaults() {
21 | return tree();
22 | }
23 |
24 | /**
25 | * 树模式
26 | * @return 树
27 | * @since 0.7.0
28 | */
29 | public static IWordData tree() {
30 | return new WordDataTree();
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.api;
2 |
3 | /**
4 | * 敏感词的结果
5 | * @author binbin.hou
6 | * @since 0.1.0
7 | */
8 | public interface IWordResult {
9 |
10 | /**
11 | * 开始下标
12 | * @return 开始下标
13 | * @since 0.1.0
14 | */
15 | int startIndex();
16 |
17 | /**
18 | * 结束下标
19 | * @return 结束下标
20 | * @since 0.1.0
21 | */
22 | int endIndex();
23 |
24 | /**
25 | * 类别
26 | * @return 类别
27 | * @since 0.14.0
28 | */
29 | String type();
30 |
31 | /**
32 | * 实际匹配的单词,方便统一的标签等处理,实际问题排查等
33 | * @return 结果
34 | * @since 0.25.1
35 | */
36 | String word();
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/api/IWordWarmUp.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.api;
2 |
3 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
4 |
5 | import java.util.List;
6 |
7 | /**
8 | * 提前预热,触发类加载、JIT 优化等
9 | * @author binbin.hou
10 | * @since 0.29.0
11 | */
12 | public interface IWordWarmUp {
13 |
14 | /**
15 | * 预热
16 | * @param sensitiveWordBs 引导类本身
17 | * @param wordContext 上下文
18 | * @param wordDenyList 允许
19 | * @param wordAllowList 禁止
20 | */
21 | void warmUp(final SensitiveWordBs sensitiveWordBs,
22 | final IWordContext wordContext,
23 | final List wordAllowList,
24 | final List wordDenyList);
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/ignore/AbstractSensitiveWordCharIgnore.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.ignore;
2 |
3 | import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
4 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
5 |
6 | /**
7 | * 抽象实现
8 | * @since 0.11.0
9 | */
10 | public abstract class AbstractSensitiveWordCharIgnore implements ISensitiveWordCharIgnore {
11 |
12 | protected abstract boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext);
13 |
14 | @Override
15 | public boolean ignore(int ix, String text, InnerSensitiveWordContext innerContext) {
16 | return doIgnore(ix, text, innerContext);
17 | }
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/api/IWordResultCondition.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.api;
2 |
3 | import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
4 |
5 | /**
6 | * 敏感词的结果是否匹配
7 | * @author binbin.hou
8 | * @since 0.13.0
9 | */
10 | public interface IWordResultCondition {
11 |
12 | /**
13 | * 是否匹配
14 | * @param wordResult 根据词匹配的结果
15 | * @param text 原始文本
16 | * @param modeEnum 枚举类别
17 | * @param context 上下文
18 | * @return 是否匹配
19 | * @since 0.13.0
20 | */
21 | boolean match(final IWordResult wordResult,
22 | final String text,
23 | final WordValidModeEnum modeEnum,
24 | final IWordContext context);
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/doc/发布流程.md:
--------------------------------------------------------------------------------
1 | # push to mvn center
2 |
3 | 确认版本为 release
4 |
5 | ```
6 | mvn clean deploy -P release
7 | ```
8 |
9 | # commit to github
10 |
11 | ```
12 | git push
13 | ```
14 |
15 | # merge to master
16 |
17 | ```
18 | git checkout master
19 | git pull
20 | git checkout branch
21 | git rebase master (用rebase合并主干的修改,如果有冲突在此时解决)
22 | git checkout master
23 | git merge branch
24 | git push
25 | ```
26 |
27 | # create new branch & checkout
28 |
29 | ```
30 | git branch release_XXX
31 | git checkout release_XXX
32 | ```
33 |
34 | # modify project version
35 |
36 | ```
37 | mvn versions:set -DgroupId=com.github.houbb -DartifactId=paradise* -DoldVersion=1.1.1 -DnewVersion=1.1.2-SNAPSHOT-->
38 | mvn -N versions:update-child-modules
39 | mvn versions:commit
40 | ```
41 |
42 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordTypeEnum.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.constant.enums;
2 |
3 | /**
4 | * 单词类别包含类别
5 | * @since 0.14.0
6 | */
7 | public enum WordTypeEnum {
8 | WORD("WORD", "敏感词"),
9 | EMAIL("EMAIL", "邮箱"),
10 | URL("URL", "链接"),
11 | NUM("NUM", "数字"),
12 | IPV4("IPV4", "IPv4"),
13 |
14 | DEFAULTS("DEFAULTS", "默认"),
15 | ;
16 |
17 | private final String code;
18 | private final String desc;
19 |
20 | WordTypeEnum(String code, String desc) {
21 | this.code = code;
22 | this.desc = desc;
23 | }
24 |
25 | public String getCode() {
26 | return code;
27 | }
28 |
29 | public String getDesc() {
30 | return desc;
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatNone.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordFormat;
5 | import com.github.houbb.sensitive.word.api.IWordContext;
6 |
7 | /**
8 | * 无处理
9 | *
10 | * @author binbin.hou
11 | * @since 0.0.5
12 | */
13 | @ThreadSafe
14 | public class WordFormatNone implements IWordFormat {
15 |
16 | private static final IWordFormat INSTANCE = new WordFormatNone();
17 |
18 | public static IWordFormat getInstance() {
19 | return INSTANCE;
20 | }
21 |
22 | @Override
23 | public char format(char original, IWordContext context) {
24 | return original;
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/api/combine/IWordAllowDenyCombine.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.api.combine;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordContext;
4 |
5 | import java.util.Collection;
6 | import java.util.List;
7 |
8 | /**
9 | * @author d
10 | * @since 0.8.0
11 | */
12 | public interface IWordAllowDenyCombine {
13 |
14 | /**
15 | * 获取最终的拒绝单词列表
16 | * @param allowList 允许
17 | * @param denyList 拒绝
18 | * @param context 上下文
19 | * @return 结果
20 | * @since 0.8.0
21 | */
22 | Collection getActualDenyList(final List allowList,
23 | final List denyList,
24 | final IWordContext context);
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTagMap.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.tag;
2 |
3 | import com.github.houbb.heaven.util.common.ArgUtil;
4 |
5 | import java.util.Map;
6 | import java.util.Set;
7 |
8 | /**
9 | * 根据 map 构建初始化
10 | *
11 | * key:单词
12 | * value: 标签 set
13 | *
14 | * @since 0.24.0
15 | */
16 | public class WordTagMap extends AbstractWordTag {
17 |
18 | private final Map> wordTagMap;
19 |
20 | public WordTagMap(Map> wordTagMap) {
21 | ArgUtil.notNull(wordTagMap, "wordTagMap");
22 | this.wordTagMap = wordTagMap;
23 | }
24 |
25 | @Override
26 | protected Set doGetTag(String word) {
27 | return wordTagMap.get(word);
28 | }
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.util.List;
7 |
8 | /**
9 | * project: sensitive-word-SensitiveWordBsTest
10 | * create on 2020/1/7 23:43
11 | *
12 | * @author Administrator
13 | * @since 0.0.6
14 | */
15 | public class SensitiveWordBsChineseTest {
16 |
17 | /**
18 | * 忽略中文繁简体
19 | * @since 0.0.6
20 | */
21 | @Test
22 | public void ignoreChineseStyleTest() {
23 | final String text = "我爱我的祖国和五星紅旗。";
24 |
25 | List wordList = SensitiveWordBs.newInstance().init().findAll(text);
26 | Assert.assertEquals("[五星紅旗]", wordList.toString());
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEnglishTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.util.List;
7 |
8 | /**
9 | * project: sensitive-word-SensitiveWordBsTest
10 | * create on 2020/1/7 23:43
11 | *
12 | * @author Administrator
13 | * @since 0.0.6
14 | */
15 | public class SensitiveWordBsEnglishTest {
16 |
17 | /**
18 | * 忽略英文写法
19 | * @since 0.0.6
20 | */
21 | @Test
22 | public void ignoreEnglishStyleTest() {
23 | final String text = "Ⓕⓤc⒦ the bad words";
24 |
25 | List wordList = SensitiveWordBs.newInstance().init().findAll(text);
26 | Assert.assertEquals("[Ⓕⓤc⒦]", wordList.toString());
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTagSystem.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.tag;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordTag;
4 | import com.github.houbb.sensitive.word.utils.InnerStreamUtils;
5 |
6 | import java.util.List;
7 | import java.util.Set;
8 |
9 | /**
10 | * 系统内置策略,根据文件默认处理
11 | *
12 | * @since 0.24.0
13 | */
14 | public class WordTagSystem extends AbstractWordTag {
15 |
16 | private final IWordTag wordTag;
17 |
18 | public WordTagSystem() {
19 | List lines = InnerStreamUtils.readAllLines("/sensitive_word_tags.txt");
20 | this.wordTag = WordTags.lines(lines);
21 | }
22 |
23 | @Override
24 | protected Set doGetTag(String word) {
25 | return wordTag.getTag(word);
26 | }
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreCase.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordFormat;
5 | import com.github.houbb.sensitive.word.api.IWordContext;
6 |
7 | /**
8 | * 忽略大小写
9 | * @author binbin.hou
10 | * @since 0.0.5
11 | */
12 | @ThreadSafe
13 | public class WordFormatIgnoreCase implements IWordFormat {
14 |
15 | private static final IWordFormat INSTANCE = new WordFormatIgnoreCase();
16 |
17 | public static IWordFormat getInstance() {
18 | return INSTANCE;
19 | }
20 |
21 | @Override
22 | public char format(char original, IWordContext context) {
23 | return Character.toLowerCase(original);
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUserDefineTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.util.List;
7 |
8 | /**
9 | * project: sensitive-word-SensitiveWordBsTest
10 | * create on 2020/1/7 23:43
11 | *
12 | * @author Administrator
13 | * @since 0.0.8
14 | */
15 | public class SensitiveWordBsUserDefineTest {
16 |
17 | /**
18 | * 自定义允许和拒绝的文件
19 | * @since 0.0.8
20 | */
21 | @Test
22 | public void allowAndDenyTest() {
23 | final String text = "gender 我们认为应该通过,自定义敏感词我们认为应该拒绝。";
24 |
25 | List wordList = SensitiveWordBs.newInstance().init().findAll(text);
26 | Assert.assertEquals("[自定义敏感词]", wordList.toString());
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/constant/WordConst.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.constant;
2 |
3 | /**
4 | * project: sensitive-word-AppConst
5 | * create on 2020/1/7 23:39
6 | *
7 | * @author Administrator
8 | * @since 0.0.1
9 | */
10 | public final class WordConst {
11 |
12 | private WordConst(){}
13 |
14 | /**
15 | * 是否为结束标识
16 | * ps: 某种角度而言,我不是很喜欢这种风格。
17 | * (1)正常的 char 只會占用一個字符,这里直接给定两个字符即可,降低 Map 的容量。
18 | * @since 0.0.1
19 | */
20 | public static final String IS_END = "ED";
21 |
22 | /**
23 | * 最长的网址长度
24 | * @since 0.3.0
25 | */
26 | public static final int MAX_WEB_SITE_LEN = 70;
27 |
28 | /**
29 | * 最大邮箱地址
30 | * @since 0.4.0
31 | */
32 | public static final int MAX_EMAIL_LEN = 64;
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/tag/AbstractWordTag.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.tag;
2 |
3 | import com.github.houbb.heaven.util.lang.StringUtil;
4 | import com.github.houbb.sensitive.word.api.IWordTag;
5 |
6 | import java.util.Collections;
7 | import java.util.Set;
8 |
9 | /**
10 | * 抽象的单词标签
11 | *
12 | * @since 0.10.0
13 | */
14 | public abstract class AbstractWordTag implements IWordTag {
15 |
16 |
17 | /**
18 | * 获取标签
19 | * @param word 单词
20 | * @return 结果
21 | */
22 | protected abstract Set doGetTag(String word);
23 |
24 | @Override
25 | public Set getTag(String word) {
26 | if(StringUtil.isEmpty(word)) {
27 | return Collections.emptySet();
28 | }
29 |
30 | return doGetTag(word);
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/exception/SensitiveWordException.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.exception;
2 |
3 | /**
4 | * @author binbin.hou
5 | * @since 0.0.1
6 | */
7 | public class SensitiveWordException extends RuntimeException {
8 |
9 | public SensitiveWordException() {
10 | }
11 |
12 | public SensitiveWordException(String message) {
13 | super(message);
14 | }
15 |
16 | public SensitiveWordException(String message, Throwable cause) {
17 | super(message, cause);
18 | }
19 |
20 | public SensitiveWordException(Throwable cause) {
21 | super(cause);
22 | }
23 |
24 | public SensitiveWordException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
25 | super(message, cause, enableSuppression, writableStackTrace);
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.heaven.util.util.regex.RegexUtil;
4 | import com.github.houbb.sensitive.word.api.IWordCheck;
5 |
6 | /**
7 | * (1)暂时先粗略的处理 web-site
8 | * (2)如果网址的最后为图片类型,则跳过。
9 | * (3)长度超过 70,直接结束。
10 | *
11 | * 不包含前缀的实现策略
12 | *
13 | * @author binbin.hou
14 | * @since 0.25.0
15 | */
16 | public class WordCheckUrlNoPrefix extends WordCheckUrl {
17 |
18 | /**
19 | * @since 0.3.0
20 | */
21 | private static final IWordCheck INSTANCE = new WordCheckUrlNoPrefix();
22 |
23 | public static IWordCheck getInstance() {
24 | return INSTANCE;
25 | }
26 |
27 | @Override
28 | protected boolean isUrl(String text) {
29 | return RegexUtil.isWebSite(text);
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreWidth.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordFormat;
6 | import com.github.houbb.sensitive.word.utils.InnerCharUtils;
7 |
8 | /**
9 | * 格式化字宽度
10 | * @author binbin.hou
11 | * @since 0.0.5
12 | */
13 | @ThreadSafe
14 | public class WordFormatIgnoreWidth implements IWordFormat {
15 |
16 | private static final IWordFormat INSTANCE = new WordFormatIgnoreWidth();
17 |
18 | public static IWordFormat getInstance() {
19 | return INSTANCE;
20 | }
21 |
22 | @Override
23 | public char format(char original, IWordContext context) {
24 | return InnerCharUtils.toHalfWidth(original);
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreChineseStyle.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.opencc4j.util.ZhSlimUtil;
5 | import com.github.houbb.sensitive.word.api.IWordContext;
6 | import com.github.houbb.sensitive.word.api.IWordFormat;
7 |
8 | /**
9 | * 忽略中文样式
10 | * @author binbin.hou
11 | * @since 0.0.5
12 | */
13 | @ThreadSafe
14 | public class WordFormatIgnoreChineseStyle implements IWordFormat {
15 |
16 | private static final IWordFormat INSTANCE = new WordFormatIgnoreChineseStyle();
17 |
18 | public static IWordFormat getInstance() {
19 | return INSTANCE;
20 | }
21 |
22 | @Override
23 | public char format(char original, IWordContext context) {
24 | return ZhSlimUtil.toSimple(original);
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.util.List;
7 |
8 | /**
9 | * project: sensitive-word-SensitiveWordBsTest
10 | * create on 2020/1/7 23:43
11 | *
12 | * @author Administrator
13 | * @since 0.0.7
14 | */
15 | public class SensitiveWordBsRepeatTest {
16 |
17 | /**
18 | * 忽略重复词
19 | * @since 0.0.7
20 | */
21 | @Test
22 | public void ignoreChineseStyleTest() {
23 | final String text = "ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦ the bad words";
24 |
25 | List wordList = SensitiveWordBs.newInstance()
26 | .ignoreRepeat(true)
27 | .init()
28 | .findAll(text);
29 | Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.allow;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordAllow;
5 | import com.github.houbb.sensitive.word.utils.InnerStreamUtils;
6 |
7 | import java.util.List;
8 |
9 | /**
10 | * 系统默认的信息
11 | * @author binbin.hou
12 | * @since 0.0.13
13 | */
14 | @ThreadSafe
15 | public class WordAllowSystem implements IWordAllow {
16 |
17 | /**
18 | * @since 0.3.0
19 | */
20 | private static final WordAllowSystem INSTANCE = new WordAllowSystem();
21 |
22 | public static WordAllowSystem getInstance() {
23 | return INSTANCE;
24 | }
25 |
26 | @Override
27 | public List allow() {
28 | return InnerStreamUtils.readAllLines("/sensitive_word_allow.txt");
29 | }
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsDataTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import com.github.houbb.sensitive.word.support.data.WordDatas;
4 | import org.junit.Assert;
5 | import org.junit.Test;
6 |
7 | /**
8 | * project: sensitive-word-SensitiveWordBsConfigTest
9 | * create on 2020/1/7 23:43
10 | *
11 | * @author Administrator
12 | * @since 0.7.0
13 | */
14 | public class SensitiveWordBsDataTest {
15 |
16 | @Test
17 | public void wordDataConfigTest() {
18 | SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
19 | .wordData(WordDatas.tree())
20 | .init();
21 |
22 | final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
23 | Assert.assertTrue(wordBs.contains(text));
24 | Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordBs.findAll(text).toString());
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/combine/format/AbstractWordFormatCombine.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.combine.format;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordFormat;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.combine.IWordFormatCombine;
6 | import com.github.houbb.sensitive.word.support.format.WordFormats;
7 |
8 | import java.util.List;
9 |
10 | /**
11 | * @author d
12 | * @since 0.8.0
13 | */
14 | public abstract class AbstractWordFormatCombine implements IWordFormatCombine {
15 |
16 | protected abstract List getWordFormatList(IWordContext context);
17 |
18 | @Override
19 | public IWordFormat initWordFormat(IWordContext context) {
20 | List list = getWordFormatList(context);
21 | return WordFormats.chains(list);
22 | }
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/combine/check/AbstractWordCheckCombine.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.combine.check;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordCheck;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.combine.IWordCheckCombine;
6 | import com.github.houbb.sensitive.word.support.check.WordChecks;
7 |
8 | import java.util.List;
9 |
10 | /**
11 | * @author d
12 | * @since 0.8.0
13 | */
14 | public abstract class AbstractWordCheckCombine implements IWordCheckCombine {
15 |
16 | protected abstract List getWordCheckList(IWordContext context);
17 |
18 | @Override
19 | public IWordCheck initWordCheck(IWordContext context) {
20 | List wordCheckList = getWordCheckList(context);
21 |
22 | return WordChecks.chains(wordCheckList);
23 | }
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/ignore/SpecialCharSensitiveWordCharIgnore.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.ignore;
2 |
3 | import com.github.houbb.heaven.util.lang.StringUtil;
4 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
5 |
6 | import java.util.Set;
7 |
8 | /**
9 | * 特殊字符忽略
10 | * @since 0.11.0
11 | */
12 | public class SpecialCharSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
13 |
14 | private static final String SPECIAL = "`-=~!@#$%^&*()_+[]{}\\|;:'\",./<>?";
15 |
16 | private static final Set SET;
17 |
18 | static {
19 | SET = StringUtil.toCharSet(SPECIAL);
20 | }
21 |
22 | @Override
23 | protected boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext) {
24 | char c = text.charAt(ix);
25 | return SET.contains(c);
26 | }
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/result/AbstractWordResultHandler.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.result;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordContext;
4 | import com.github.houbb.sensitive.word.api.IWordResult;
5 | import com.github.houbb.sensitive.word.api.IWordResultHandler;
6 |
7 | /**
8 | * 抽象的处理结果
9 | *
10 | * @since 0.12.0
11 | * @param 泛型
12 | */
13 | public abstract class AbstractWordResultHandler implements IWordResultHandler {
14 |
15 | protected abstract R doHandle(IWordResult wordResult, IWordContext wordContext, String originalText);
16 |
17 | @Override
18 | public R handle(IWordResult wordResult, IWordContext wordContext, String originalText) {
19 | if(wordResult == null) {
20 | return null;
21 | }
22 |
23 | return doHandle(wordResult, wordContext, originalText);
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/result/WordTagsDto.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.result;
2 |
3 | import java.io.Serializable;
4 | import java.util.Set;
5 |
6 | /**
7 | * @since 0.12.0
8 | */
9 | public class WordTagsDto implements Serializable {
10 |
11 | private String word;
12 |
13 | private Set tags;
14 |
15 | public String getWord() {
16 | return word;
17 | }
18 |
19 | public void setWord(String word) {
20 | this.word = word;
21 | }
22 |
23 | public Set getTags() {
24 | return tags;
25 | }
26 |
27 | public void setTags(Set tags) {
28 | this.tags = tags;
29 | }
30 |
31 | @Override
32 | public String toString() {
33 | return "WordTagsDto{" +
34 | "word='" + word + '\'' +
35 | ", tags=" + tags +
36 | '}';
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/utils/InnerStreamUtils.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.utils;
2 |
3 | import com.github.houbb.heaven.util.io.StreamUtil;
4 |
5 | import java.io.IOException;
6 | import java.io.InputStream;
7 | import java.util.Collections;
8 | import java.util.List;
9 |
10 | /**
11 | * @since 0.27.1
12 | */
13 | public class InnerStreamUtils {
14 |
15 | /**
16 | * 获取文件,兼容为空的场景
17 | * @param path 路径
18 | * @return 结果
19 | */
20 | public static List readAllLines(String path) {
21 | try(InputStream inputStream = StreamUtil.class.getResourceAsStream(path);) {
22 | if(inputStream == null) {
23 | return Collections.emptyList();
24 | }
25 | } catch (IOException e) {
26 | throw new RuntimeException(e);
27 | }
28 |
29 | return StreamUtil.readAllLines(path);
30 | }
31 |
32 | }
33 |
34 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.result;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordResult;
6 |
7 | /**
8 | * 不做任何处理
9 | * @author binbin.hou
10 | * @since 0.1.0
11 | */
12 | @ThreadSafe
13 | public class WordResultHandlerRaw extends AbstractWordResultHandler {
14 |
15 | /**
16 | * @since 0.3.0
17 | */
18 | private static final WordResultHandlerRaw INSTANCE = new WordResultHandlerRaw();
19 |
20 | public static WordResultHandlerRaw getInstance() {
21 | return INSTANCE;
22 | }
23 |
24 | @Override
25 | protected IWordResult doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
26 | return wordResult;
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/AbstractWordResultCondition.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.resultcondition;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordContext;
4 | import com.github.houbb.sensitive.word.api.IWordResult;
5 | import com.github.houbb.sensitive.word.api.IWordResultCondition;
6 | import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
7 |
8 | /**
9 | * 抽象实现
10 | *
11 | * @since 0.13.0
12 | */
13 | public abstract class AbstractWordResultCondition implements IWordResultCondition {
14 |
15 | protected abstract boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context);
16 |
17 | @Override
18 | public boolean match(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
19 | return doMatch(wordResult, text, modeEnum, context);
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/mapping/AbstractWordFormatText.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format.mapping;
2 |
3 | import com.github.houbb.heaven.util.lang.StringUtil;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordFormatText;
6 |
7 | import java.util.Collections;
8 | import java.util.HashMap;
9 | import java.util.Map;
10 |
11 | /**
12 | * 抽象实现
13 | * @author binbin.hou
14 | * @since 0.28.0
15 | */
16 | public abstract class AbstractWordFormatText implements IWordFormatText {
17 |
18 | protected abstract Map doFormat(String text, IWordContext context);
19 |
20 | @Override
21 | public Map format(String text, IWordContext context) {
22 | if(StringUtil.isEmpty(text)) {
23 | return Collections.emptyMap();
24 | }
25 |
26 | return doFormat(text, context);
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaces.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.replace;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordReplace;
4 |
5 | /**
6 | * 字符替换策略工具类
7 | *
8 | * @author binbin.hou
9 | * @since 0.3.0
10 | */
11 | public final class WordReplaces {
12 |
13 | private WordReplaces(){}
14 |
15 | /**
16 | * 字符
17 | * @param c 字符
18 | * @return 结果
19 | * @since 0.3.0
20 | */
21 | public static IWordReplace chars(final char c) {
22 | return new WordReplaceChar(c);
23 | }
24 |
25 | /**
26 | * 字符,默认为 *
27 | * @return 结果
28 | * @since 0.3.0
29 | */
30 | public static IWordReplace chars() {
31 | return new WordReplaceChar();
32 | }
33 |
34 | /**
35 | * 字符,默认为 *
36 | * @return 结果
37 | * @since 0.7.0
38 | */
39 | public static IWordReplace defaults() {
40 | return chars();
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordTagType.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.constant.enums;
2 |
3 | /**
4 | * 单词标签类别
5 | *
6 | * @since 0.24.0
7 | */
8 | public enum WordTagType {
9 | ZHENGZHI("0", "政治"),
10 | DUPIN("1", "毒品"),
11 | SEQING("2", "色情"),
12 | DUBO("3", "赌博"),
13 | FANZUI("4", "违法犯罪"),
14 | ;
15 |
16 | private final String code;
17 | private final String desc;
18 |
19 | WordTagType(String code, String desc) {
20 | this.code = code;
21 | this.desc = desc;
22 | }
23 |
24 | public String getCode() {
25 | return code;
26 | }
27 |
28 | public String getDesc() {
29 | return desc;
30 | }
31 |
32 | public static String getDescByCode(final String code) {
33 | for(WordTagType tagType : WordTagType.values()) {
34 | if(tagType.code.equals(code)) {
35 | return tagType.desc;
36 | }
37 | }
38 | return code;
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/data/WordCountDto.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.data;
2 |
3 | public class WordCountDto implements Comparable {
4 |
5 | private String code;
6 | private int count;
7 |
8 | public WordCountDto(String code, int count) {
9 | this.code = code;
10 | this.count = count;
11 | }
12 |
13 | public String getCode() {
14 | return code;
15 | }
16 |
17 | public void setCode(String code) {
18 | this.code = code;
19 | }
20 |
21 | public int getCount() {
22 | return count;
23 | }
24 |
25 | public void setCount(int count) {
26 | this.count = count;
27 | }
28 |
29 |
30 | @Override
31 | public int compareTo(WordCountDto o) {
32 | return this.count - o.count;
33 | }
34 |
35 | @Override
36 | public String toString() {
37 | return "{" +
38 | "n='" + code + '\'' +
39 | ", c=" + count +
40 | '}';
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bugs/b32/MyWordDenyChineseTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bugs.b32;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordDeny;
4 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
5 | import com.github.houbb.sensitive.word.support.deny.WordDenys;
6 | import org.junit.Assert;
7 | import org.junit.Test;
8 |
9 | public class MyWordDenyChineseTest {
10 |
11 | @Test
12 | public void test() {
13 | IWordDeny wordDeny = WordDenys.chains(WordDenys.defaults(), new MyWordDenyChineseNum());
14 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
15 | .wordDeny(wordDeny)// 各种其他配置
16 | .init();// init() 初始化敏感词字典
17 |
18 | final String text = "和我练习三三九乘元功、一军两策";
19 |
20 | //输出测试结果
21 | Assert.assertEquals("[三三九乘元功, 一军两策]", sensitiveWordBs.findAll(text).toString());
22 | Assert.assertTrue(sensitiveWordBs.contains("三三九乘元功"));
23 | Assert.assertTrue(sensitiveWordBs.contains("一军两策"));
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/spring/annotation/Configuration.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.spring.annotation;
2 |
3 | /*
4 | * Copyright 2002-2017 the original author or authors.
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | import java.lang.annotation.*;
20 |
21 |
22 | @Target(ElementType.TYPE)
23 | @Retention(RetentionPolicy.RUNTIME)
24 | @Documented
25 | @Component
26 | public @interface Configuration {
27 |
28 | String value() default "";
29 |
30 | }
31 |
32 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/support/resultcondition/MyWordTag.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.resultcondition;
2 |
3 | import com.github.houbb.sensitive.word.support.tag.AbstractWordTag;
4 |
5 | import java.util.HashMap;
6 | import java.util.HashSet;
7 | import java.util.Map;
8 | import java.util.Set;
9 |
10 | /**
11 | * 自定义单词标签
12 | * @since 0.23.0
13 | */
14 | public class MyWordTag extends AbstractWordTag {
15 |
16 | private static Map> dataMap;
17 |
18 | static {
19 | dataMap = new HashMap<>();
20 | dataMap.put("商品", buildSet("广告", "中文"));
21 | dataMap.put("AV", buildSet("色情", "单词", "英文"));
22 | }
23 |
24 | private static Set buildSet(String... tags) {
25 | Set set = new HashSet<>();
26 | for(String tag : tags) {
27 | set.add(tag);
28 | }
29 | return set;
30 | }
31 |
32 | @Override
33 | protected Set doGetTag(String word) {
34 | return dataMap.get(word);
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.result;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordResult;
6 | import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
7 |
8 | /**
9 | * 只保留单词
10 | *
11 | * @author binbin.hou
12 | * @since 0.1.0
13 | */
14 | @ThreadSafe
15 | public class WordResultHandlerWord extends AbstractWordResultHandler {
16 |
17 | /**
18 | * @since 0.3.0
19 | */
20 | private static final WordResultHandlerWord INSTANCE = new WordResultHandlerWord();
21 |
22 | public static WordResultHandlerWord getInstance() {
23 | return INSTANCE;
24 | }
25 |
26 | @Override
27 | protected String doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
28 | // 截取
29 | return InnerWordCharUtils.getString(originalText, wordResult);
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/api/IWordCheck.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.api;
2 |
3 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
4 | import com.github.houbb.sensitive.word.support.check.WordCheckResult;
5 |
6 | /**
7 | * 敏感信息监测接口
8 | * (1)敏感词
9 | * (2)数字(连续8位及其以上)
10 | * (3)邮箱
11 | * (4)URL
12 | *
13 | * 可以使用责任链的模式,循环调用。
14 | * @author binbin.hou
15 | * @since 0.0.5
16 | */
17 | public interface IWordCheck {
18 |
19 | /**
20 | * 检查敏感词数量
21 | *
22 | * (1)如果未命中敏感词,直接返回 0
23 | * (2)命中敏感词,则返回敏感词的长度。
24 | *
25 | * ps: 这里结果进行优化,
26 | * 1. 是否包含敏感词。
27 | * 2. 敏感词的长度
28 | * 3. 正常走过字段的长度(便于后期替换优化,避免不必要的循环重复)
29 | *
30 | * @param beginIndex 开始下标
31 | * @param context 执行上下文
32 | * @return 敏感信息对应的长度
33 | * @since 0.0.5
34 | * @since 0.24.2 为了黑白名单统一,调整了对应的返回值
35 | */
36 | WordCheckResult sensitiveCheck(final int beginIndex,
37 | final InnerSensitiveWordContext context);
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.deny;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordDeny;
5 | import com.github.houbb.sensitive.word.utils.InnerStreamUtils;
6 |
7 | import java.util.List;
8 |
9 | /**
10 | * 系统默认的信息
11 | * @author binbin.hou
12 | * @since 0.0.13
13 | */
14 | @ThreadSafe
15 | public class WordDenySystem implements IWordDeny {
16 |
17 | /**
18 | * @since 0.3.0
19 | */
20 | private static final IWordDeny INSTANCE = new WordDenySystem();
21 |
22 | public static IWordDeny getInstance() {
23 | return INSTANCE;
24 | }
25 |
26 | @Override
27 | public List deny() {
28 | List results = InnerStreamUtils.readAllLines("/sensitive_word_dict.txt");
29 | results.addAll(InnerStreamUtils.readAllLines("/sensitive_word_dict_en.txt"));
30 | results.addAll(InnerStreamUtils.readAllLines("/sensitive_word_deny.txt"));
31 | return results;
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.result;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordResult;
4 | import com.github.houbb.sensitive.word.api.IWordResultHandler;
5 |
6 | /**
7 | * 敏感词的结果处理
8 | * @author binbin.hou
9 | * @since 0.1.0
10 | */
11 | public final class WordResultHandlers {
12 |
13 | private WordResultHandlers(){}
14 |
15 | /**
16 | * 不做任何处理
17 | * @return 结果
18 | * @since 0.1.0
19 | */
20 | public static IWordResultHandler raw() {
21 | return WordResultHandlerRaw.getInstance();
22 | }
23 |
24 | /**
25 | * 只保留单词
26 | * @return 结果
27 | * @since 0.1.0
28 | */
29 | public static IWordResultHandler word() {
30 | return WordResultHandlerWord.getInstance();
31 | }
32 |
33 | /**
34 | * 单词+标签的处理结果
35 | * @return 单词+标签的处理结果
36 | * @since 0.12.0
37 | */
38 | public static IWordResultHandler wordTags() {
39 | return new WordResultHandlerWordTags();
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsSystemDictTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordAllow;
4 | import com.github.houbb.sensitive.word.api.IWordDeny;
5 | import com.github.houbb.sensitive.word.support.allow.WordAllows;
6 | import com.github.houbb.sensitive.word.support.deny.WordDenys;
7 | import com.github.houbb.sensitive.word.support.tag.WordTags;
8 | import org.junit.Assert;
9 | import org.junit.Test;
10 |
11 | import java.util.Arrays;
12 | import java.util.List;
13 |
14 | /**
15 | * project: sensitive-word-SensitiveWordBsTest
16 | * create on 2020/1/7 23:43
17 | *
18 | * @author Administrator
19 | * @since 0.27.0
20 | */
21 | public class SensitiveWordBsSystemDictTest {
22 |
23 | @Test
24 | public void configTest() {
25 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
26 | .wordAllow(WordAllows.defaults())
27 | .wordDeny(WordDenys.defaults())
28 | .wordTag(WordTags.defaults())
29 | .init();
30 | }
31 |
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/spring/service/SensitiveWordService.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.spring.service;
2 |
3 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
4 | import com.github.houbb.sensitive.word.spring.annotation.Autowired;
5 | import com.github.houbb.sensitive.word.spring.annotation.Component;
6 |
7 | /**
8 | * @author binbin.hou
9 | * @since 1.0.0
10 | */
11 | @Component
12 | public class SensitiveWordService {
13 |
14 | @Autowired
15 | private SensitiveWordBs sensitiveWordBs;
16 |
17 | /**
18 | * 更新词库
19 | *
20 | * 每次数据库的信息发生变化之后,首先调用更新数据库敏感词库的方法。
21 | * 如果需要生效,则调用这个方法。
22 | *
23 | * 说明:重新初始化不影响旧的方法使用。初始化完成后,会以新的为准。
24 | */
25 | public void refresh() {
26 | // 每次数据库的信息发生变化之后,首先调用更新数据库敏感词库的方法,然后调用这个方法。
27 | sensitiveWordBs.init();
28 | }
29 |
30 | /**
31 | * 是否包含
32 | *
33 | * 可以重新封装,也可以直接使用 sensitiveWordBs
34 | * @param word 单词
35 | * @return 结果
36 | */
37 | public boolean contains(String word){
38 | return sensitiveWordBs.contains(word);
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/release.bat:
--------------------------------------------------------------------------------
1 | :: 用于 release 当前项目(windows)
2 | :: author: houbb
3 | :: LastUpdateTime: 2018-1-22 09:08:52
4 | :: 用法:双击运行,或者当前路径 cmd 直接输入 release.bat
5 |
6 | :: 关闭回显
7 | @echo OFF
8 |
9 | ECHO "============================= RELEASE START..."
10 |
11 | :: 版本号信息(需要手动指定)
12 | :::: 旧版本名称
13 | SET version=0.28.0
14 | :::: 新版本名称
15 | SET newVersion=0.29.0
16 | :::: 组织名称
17 | SET groupName=com.github.houbb
18 | :::: 项目名称
19 | SET projectName=sensitive-word
20 |
21 | :: release 项目版本
22 | :::: snapshot 版本号
23 | SET snapshot_version=%version%"-SNAPSHOT"
24 | :::: 新的版本号
25 | SET release_version=%version%
26 |
27 | call mvn versions:set -DgroupId=%groupName% -DartifactId=%projectName% -DoldVersion=%snapshot_version% -DnewVersion=%release_version%
28 | call mvn -N versions:update-child-modules
29 | call mvn versions:commit
30 | call echo "1. RELEASE %snapshot_version% TO %release_version% DONE."
31 |
32 |
33 | :: 推送到 github
34 | git add .
35 | git commit -m "release branch %version%"
36 | git push
37 | git status
38 |
39 | ECHO "2. PUSH TO GITHUB DONE."
40 |
41 | :: 推送到 maven 中央仓库
42 | call mvn clean deploy -P release
43 | ECHO "3 PUSH TO MVN CENTER DONE."
44 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MySensitiveTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bugs.b20211211;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordAllow;
4 | import com.github.houbb.sensitive.word.api.IWordDeny;
5 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
6 | import com.github.houbb.sensitive.word.support.allow.WordAllows;
7 | import com.github.houbb.sensitive.word.support.deny.WordDenys;
8 | import org.junit.Test;
9 |
10 | public class MySensitiveTest {
11 |
12 |
13 | @Test
14 | public void test() {
15 | IWordDeny wordDeny = WordDenys.chains(WordDenys.defaults(), new MyWordDeny());
16 | IWordAllow wordAllow = WordAllows.chains(WordAllows.defaults(), new MyWordAllow());
17 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
18 | .wordAllow(wordAllow)
19 | .wordDeny(wordDeny)// 各种其他配置
20 | .init();// init() 初始化敏感词字典
21 |
22 | final String text = "五星红旗 我的自定义敏感词尼玛";
23 | //输出测试结果
24 | System.out.println("敏感词:"+sensitiveWordBs.findAll(text).toString());
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/WordAllowDenyCombine.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.combine.allowdeny;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordContext;
4 |
5 | import java.util.Collection;
6 | import java.util.HashSet;
7 | import java.util.List;
8 | import java.util.Set;
9 |
10 | /**
11 | * @author d
12 | * @since 0.8.0
13 | */
14 | public class WordAllowDenyCombine extends AbstractWordAllowDenyCombine{
15 |
16 | @Override
17 | protected Collection doGetActualDenyList(List allowList,
18 | List denyList,
19 | IWordContext context) {
20 | Set resultSet = new HashSet<>(denyList.size());
21 |
22 | // O(1)
23 | Set allowSet = new HashSet<>(allowList);
24 |
25 | for(String deny : denyList) {
26 | if(allowSet.contains(deny)) {
27 | continue;
28 | }
29 |
30 | resultSet.add(deny);
31 | }
32 | return resultSet;
33 | }
34 |
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsIpv4Test.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.util.List;
7 |
8 | /**
9 |
10 | */
11 | public class SensitiveWordBsIpv4Test {
12 |
13 | /**
14 | * ipv4 地址
15 | * @since 0.17.0
16 | */
17 | @Test
18 | public void defaultTest() {
19 | final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。";
20 | final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().init();
21 |
22 | List wordList = sensitiveWordBs.findAll(text);
23 | Assert.assertEquals("[]", wordList.toString());
24 | }
25 |
26 | /**
27 | * ipv4 地址
28 | * @since 0.17.0
29 | */
30 | @Test
31 | public void ipv4CheckTest() {
32 | final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。";
33 | final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().enableIpv4Check(true).init();
34 | List wordList = sensitiveWordBs.findAll(text);
35 | Assert.assertEquals("[127.0.0.1]", wordList.toString());
36 | }
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.util.List;
7 |
8 | /**
9 | * project: sensitive-word-SensitiveWordBsTest
10 | * create on 2020/1/7 23:43
11 | *
12 | * @author Administrator
13 | * @since 0.2.1
14 | */
15 | public class SensitiveWordBsNumLenTest {
16 |
17 | /**
18 | * 返回所有敏感词
19 | * @since 0.2.1
20 | */
21 | @Test
22 | public void findAllTest() {
23 | final String text = "你懂得:12345678";
24 |
25 | // 默认检测 8 位
26 | List wordList = SensitiveWordBs.newInstance()
27 | .enableNumCheck(true)
28 | .init().findAll(text);
29 | Assert.assertEquals("[12345678]", wordList.toString());
30 |
31 | // 指定数字的长度,避免误杀
32 | List wordList2 = SensitiveWordBs.newInstance()
33 | .enableNumCheck(true)
34 | .numCheckLen(9)
35 | .init()
36 | .findAll(text);
37 | Assert.assertEquals("[]", wordList2.toString());
38 | }
39 |
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bugs/b118/Bug118Test.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bugs.b118;
2 |
3 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
4 | import com.github.houbb.sensitive.word.support.check.WordChecks;
5 | import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
6 | import org.junit.Assert;
7 | import org.junit.Test;
8 |
9 | public class Bug118Test {
10 |
11 | @Test
12 | public void test() {
13 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
14 | .charIgnore(SensitiveWordCharIgnores.specialChars())
15 | .wordCheckNum(WordChecks.num())
16 | .numCheckLen(8)
17 | .enableNumCheck(true)
18 | .init();
19 |
20 | Assert.assertEquals(sensitiveWordBs.findFirst("1234567===0001哈哈哈"), "1234567===0001");
21 | Assert.assertEquals(sensitiveWordBs.findFirst("12345670002 哈哈哈"), "12345670002");
22 | Assert.assertEquals(sensitiveWordBs.findFirst("=====123456====70002 哈哈哈"), "=====123456====70002");
23 | Assert.assertEquals(sensitiveWordBs.findFirst("=====123456====X70002 哈哈哈"), null);
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/tag/FileWordTag.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.tag;
2 |
3 | import com.github.houbb.heaven.util.common.ArgUtil;
4 | import com.github.houbb.heaven.util.io.FileUtil;
5 | import com.github.houbb.sensitive.word.api.IWordTag;
6 |
7 | import java.util.List;
8 | import java.util.Set;
9 |
10 | /**
11 | * 基于文件的标签
12 | *
13 | * word tag1,tag2
14 | * @since 0.10.0
15 | */
16 | public class FileWordTag extends AbstractWordTag {
17 |
18 | /**
19 | * 词和标签的分隔符
20 | */
21 | protected final IWordTag wordTag;
22 |
23 | public FileWordTag(String filePath) {
24 | this(filePath, " ", ",");
25 | }
26 |
27 | public FileWordTag(String filePath, String wordSplit, String tagSplit) {
28 | ArgUtil.notEmpty(filePath, "filePath");
29 | ArgUtil.notEmpty(wordSplit, "wordSplit");
30 | ArgUtil.notEmpty(tagSplit, "tagSplit");
31 |
32 | List lines = FileUtil.readAllLines(filePath);
33 | wordTag = WordTags.lines(lines, wordSplit, tagSplit);
34 | }
35 |
36 | @Override
37 | protected Set doGetTag(String word) {
38 | return wordTag.getTag(word);
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/data/DictRemoveSingleTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.data;
2 |
3 | import com.github.houbb.heaven.util.io.FileUtil;
4 | import org.junit.Ignore;
5 | import org.junit.Test;
6 |
7 | import java.util.List;
8 |
9 | /**
10 | * 数据初始化
11 | * @author binbin.hou
12 | * @since 0.9.0
13 | */
14 | @Ignore
15 | public class DictRemoveSingleTest {
16 |
17 | /**
18 | * 统一格式
19 | *
20 | * 1. 将所有的大写字母统一转换为小写
21 | * 2. 将所有的全角转换为半角
22 | * 3. 移除所有【空格】【符号】(这个就是各种符号的过滤了)
23 | * 4. 繁体字统一转换为简体字
24 | * @since 0.0.3
25 | */
26 | @Test
27 | @Ignore
28 | public void removeSingleWord() {
29 | final String sourceFile = "D:\\code\\github\\sensitive-word\\src\\test\\resources\\dict_20231117.txt";
30 | final String targetFile = "D:\\code\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
31 |
32 | List words = FileUtil.readAllLines(sourceFile);
33 |
34 | for(String word : words) {
35 | String wordTrim = word.trim();
36 | if(wordTrim.length() > 1) {
37 | FileUtil.append(targetFile, wordTrim);
38 | }
39 | }
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/data/DictRemoveTwoEnglishTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.data;
2 |
3 | import com.github.houbb.heaven.util.io.FileUtil;
4 | import com.github.houbb.heaven.util.lang.CharUtil;
5 | import com.github.houbb.heaven.util.lang.StringUtil;
6 | import org.junit.Ignore;
7 | import org.junit.Test;
8 |
9 | import java.util.List;
10 |
11 | /**
12 | * 数据初始化
13 | * @author binbin.hou
14 | * @since 0.9.0
15 | */
16 | @Ignore
17 | public class DictRemoveTwoEnglishTest {
18 |
19 | public static void main(String[] args) {
20 | final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
21 | final String targetFile = "D:\\github\\sensitive-word\\src\\test\\resources\\dict_v20240407.txt";
22 |
23 | List words = FileUtil.readAllLines(sourceFile);
24 |
25 | for(String word : words) {
26 | String wordTrim = word.trim();
27 | // 如果是2
28 | if(wordTrim.length() == 2 && StringUtil.isEnglish(wordTrim)) {
29 | System.out.println(word);
30 | } else {
31 | FileUtil.append(targetFile, wordTrim);
32 | }
33 | }
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatArray.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format;
2 |
3 | import com.github.houbb.heaven.util.common.ArgUtil;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordFormat;
6 |
7 | import java.util.List;
8 |
9 | /**
10 | * 直接列表调用
11 | * @author binbin.hou
12 | * @since 0.30.0
13 | */
14 | public class WordFormatArray implements IWordFormat {
15 |
16 | private final IWordFormat[] wordFormats;
17 | private final int size;
18 | public WordFormatArray(List wordFormats) {
19 | ArgUtil.notEmpty(wordFormats, "wordFormats");
20 |
21 | this.size = wordFormats.size();
22 | this.wordFormats = new IWordFormat[size];
23 | for(int i = 0; i < size; i++) {
24 | this.wordFormats[i] = wordFormats.get(i);
25 | }
26 | }
27 |
28 | @Override
29 | public char format(char original, IWordContext context) {
30 | char c = original;
31 | for(int i = 0; i < size; i++) {
32 | IWordFormat charFormat = wordFormats[i];
33 | c = charFormat.format(c, context);
34 | }
35 |
36 | return c;
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaceChar.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.replace;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.heaven.constant.CharConst;
5 | import com.github.houbb.sensitive.word.api.IWordReplace;
6 | import com.github.houbb.sensitive.word.api.IWordContext;
7 | import com.github.houbb.sensitive.word.api.IWordResult;
8 |
9 | /**
10 | * 指定字符的替换策略
11 | * @author binbin.hou
12 | * @since 0.2.0
13 | */
14 | @ThreadSafe
15 | public class WordReplaceChar implements IWordReplace {
16 |
17 | /**
18 | * 替换的字符
19 | * @since 0.3.0
20 | */
21 | private final char replaceChar;
22 |
23 | public WordReplaceChar(char replaceChar) {
24 | this.replaceChar = replaceChar;
25 | }
26 |
27 | public WordReplaceChar() {
28 | this(CharConst.STAR);
29 | }
30 |
31 | @Override
32 | public void replace(StringBuilder stringBuilder, final String rawText, IWordResult wordResult, IWordContext wordContext) {
33 | int wordLen = wordResult.endIndex() - wordResult.startIndex();
34 | for(int i = 0; i < wordLen; i++) {
35 | stringBuilder.append(replaceChar);
36 | }
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.utils;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordResult;
4 |
5 | /**
6 | * project: sensitive-word-NumUtils
7 | * create on 2020/1/8 22:18
8 | *
9 | * @author Administrator
10 | * @since 0.0.4
11 | */
12 | public final class InnerWordCharUtils {
13 |
14 | private InnerWordCharUtils() {
15 | }
16 |
17 | /**
18 | * 构建字符串
19 | * @param text 字符串
20 | * @param startIndex 开始位置
21 | * @param endIndex 结束位置
22 | * @return 结果
23 | * @since 0.29.0
24 | */
25 | public static String getString(final String text,
26 | final int startIndex,
27 | final int endIndex) {
28 | return text.substring(startIndex, endIndex);
29 | }
30 | /**
31 | * 构建字符串
32 | * @param text 字符串
33 | * @param wordResult 结果
34 | * @return 结果
35 | * @since 0.29.0
36 | */
37 | public static String getString(final String text,
38 | final IWordResult wordResult) {
39 | return getString(text, wordResult.startIndex(), wordResult.endIndex());
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/replace/MyWordReplace.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.replace;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordReplace;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordResult;
6 | import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
7 |
8 | /**
9 | * 自定义敏感词替换策略
10 | *
11 | * @author binbin.hou
12 | * @since 0.2.0
13 | */
14 | public class MyWordReplace implements IWordReplace {
15 |
16 | @Override
17 | public void replace(StringBuilder stringBuilder, final String rawText, IWordResult wordResult, IWordContext wordContext) {
18 | String sensitiveWord = InnerWordCharUtils.getString(rawText, wordResult);
19 | // 自定义不同的敏感词替换策略,可以从数据库等地方读取
20 | if("五星红旗".equals(sensitiveWord)) {
21 | stringBuilder.append("国家旗帜");
22 | } else if("毛主席".equals(sensitiveWord)) {
23 | stringBuilder.append("教员");
24 | } else {
25 | // 其他默认使用 * 代替
26 | int wordLength = wordResult.endIndex() - wordResult.startIndex();
27 | for(int i = 0; i < wordLength; i++) {
28 | stringBuilder.append('*');
29 | }
30 | }
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.util.List;
7 |
8 | /**
9 | * project: sensitive-word-SensitiveWordBsTest
10 | * create on 2020/1/7 23:43
11 | *
12 | * @author Administrator
13 | * @since 0.0.5
14 | */
15 | public class SensitiveWordBsNumTest {
16 |
17 | /**
18 | * 返回所有敏感词
19 | * @since 0.0.5
20 | */
21 | @Test
22 | public void findAllTest() {
23 | final String text = "这个是我的微信:9989123456";
24 |
25 | List wordList = SensitiveWordBs.newInstance()
26 | .enableNumCheck(true)
27 | .init().findAll(text);
28 | Assert.assertEquals("[9989123456]", wordList.toString());
29 | }
30 |
31 | /**
32 | * 返回所有敏感词
33 | * @since 0.0.5
34 | */
35 | @Test
36 | public void ignoreNumStyleTest() {
37 | final String text = "这个是我的微信:9⓿二肆⁹₈③⑸⒋➃㈤㊄";
38 |
39 | List wordList = SensitiveWordBs.newInstance()
40 | .enableNumCheck(true)
41 | .init().findAll(text);
42 | Assert.assertEquals("[9⓿二肆⁹₈③⑸⒋➃㈤㊄]", wordList.toString());
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/data/DictRemoveCommonITUsageTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.data;
2 |
3 | import com.github.houbb.heaven.util.io.FileUtil;
4 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
5 | import org.junit.Ignore;
6 | import org.junit.Test;
7 |
8 | import java.io.File;
9 | import java.util.HashSet;
10 | import java.util.List;
11 | import java.util.Set;
12 |
13 | /**
14 | * 常用的 it 使用
15 | *
16 | * @author binbin.hou
17 | * @since 0.14.1
18 | */
19 | @Ignore
20 | public class DictRemoveCommonITUsageTest {
21 |
22 | /**
23 | * 统计自己的文章,移除常用的 it 用语等。降低误判率
24 | */
25 | @Test
26 | @Ignore
27 | public void removeSingleWord() {
28 | final String dir = "D:\\github\\houbb.github.io\\_posts";
29 |
30 | File[] files = new File(dir).listFiles();
31 |
32 | // 默认策略
33 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().init();
34 |
35 | Set allWords = new HashSet<>();
36 | for(File file : files) {
37 | String content = FileUtil.getFileContent(file);
38 |
39 | List words = sensitiveWordBs.findAll(content);
40 | allWords.addAll(words);
41 | }
42 |
43 | System.out.println(allWords);
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import com.github.houbb.sensitive.word.support.check.WordChecks;
4 | import org.junit.Assert;
5 | import org.junit.Test;
6 |
7 | import java.util.List;
8 |
9 | /**
10 | * project: sensitive-word-SensitiveWordBsTest
11 | * create on 2020/1/7 23:43
12 | *
13 | * @author Administrator
14 | * @since 0.25.0
15 | */
16 | public class SensitiveWordBsUrlNoPrefixTest {
17 |
18 | /**
19 | * URL 检测
20 | *
21 | * @since 0.25.0
22 | */
23 | @Test
24 | public void urlNoPrefixTest() {
25 | final String text = "点击链接 https://www.baidu.com 查看答案,当然也可以是 baidu.com、www.baidu.com";
26 |
27 | final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
28 | .enableUrlCheck(true) // 启用URL检测
29 | .wordCheckUrl(WordChecks.urlNoPrefix()) //指定检测的方式
30 | .init();
31 | List wordList = sensitiveWordBs.findAll(text);
32 | Assert.assertEquals("[www.baidu.com, baidu.com, www.baidu.com]", wordList.toString());
33 |
34 | Assert.assertEquals("点击链接 https://************* 查看答案,当然也可以是 *********、*************", sensitiveWordBs.replace(text));
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.combine.check;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordCheck;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.support.check.WordChecks;
6 |
7 | import java.util.ArrayList;
8 | import java.util.List;
9 |
10 | /**
11 | * @author d
12 | * @since 0.8.0
13 | */
14 | public class WordCheckCombine extends AbstractWordCheckCombine {
15 |
16 | @Override
17 | protected List getWordCheckList(IWordContext context) {
18 | List wordCheckList = new ArrayList<>();
19 |
20 | if(context.enableWordCheck()) {
21 | wordCheckList.add(context.wordCheckWord());
22 | }
23 | if(context.enableNumCheck()) {
24 | wordCheckList.add(context.wordCheckNum());
25 | }
26 | if(context.enableEmailCheck()) {
27 | wordCheckList.add(context.wordCheckEmail());
28 | }
29 | if(context.enableUrlCheck()) {
30 | wordCheckList.add(context.wordCheckUrl());
31 | }
32 | if(context.enableIpv4Check()) {
33 | wordCheckList.add(context.wordCheckIpv4());
34 | }
35 |
36 | return wordCheckList;
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.utils;
2 |
3 | import com.github.houbb.heaven.util.lang.StringUtil;
4 | import com.github.houbb.heaven.util.util.CollectionUtil;
5 | import com.github.houbb.sensitive.word.api.IWordContext;
6 | import com.github.houbb.sensitive.word.api.IWordTag;
7 |
8 | import java.util.Collections;
9 | import java.util.Set;
10 |
11 | /**
12 | * 内部的单词标签工具类
13 | *
14 | * @since 0.24.0
15 | */
16 | public class InnerWordTagUtils {
17 |
18 | /**
19 | * 获取敏感词的标签
20 | *
21 | * @param word 敏感词
22 | * @param wordContext 上下文
23 | * @return 结果
24 | * @since 0.24.0
25 | */
26 | public static Set tags(final String word,
27 | final IWordContext wordContext) {
28 | if(StringUtil.isEmpty(word)) {
29 | return null;
30 | }
31 |
32 | final IWordTag wordTag = wordContext.wordTag();
33 | // 直接获取
34 | Set actualSet = wordTag.getTag(word);
35 | if(CollectionUtil.isNotEmpty(actualSet)) {
36 | return actualSet;
37 | }
38 |
39 | // 格式化处理后的信息
40 | String formatWord = InnerWordFormatUtils.format(word, wordContext);
41 | return wordContext.wordTag().getTag(formatWord);
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/mapping/WordFormatTextDefault.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format.mapping;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordContext;
4 | import com.github.houbb.sensitive.word.api.IWordFormat;
5 | import com.github.houbb.sensitive.word.support.format.WordFormatNone;
6 |
7 | import java.util.Collections;
8 | import java.util.HashMap;
9 | import java.util.Map;
10 |
11 | /**
12 | * 默认实现
13 | *
14 | * @author d
15 | * @since 0.28.0
16 | */
17 | public class WordFormatTextDefault extends AbstractWordFormatText {
18 |
19 | @Override
20 | protected Map doFormat(String text, IWordContext context) {
21 | // 单个字符串里信息
22 | final IWordFormat wordFormat = context.wordFormat();
23 | // 不需要处理的场景
24 | if(wordFormat.getClass().getName().equals(WordFormatNone.class.getName())) {
25 | return Collections.emptyMap();
26 | }
27 |
28 | //v0.29.2
29 | Map map = new HashMap<>();
30 | for(int i = 0; i < text.length(); i++) {
31 | char c = text.charAt(i);
32 | char mc = wordFormat.format(c, context);
33 |
34 | if(c != mc) {
35 | map.put(c, mc);
36 | }
37 | }
38 | return map;
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/warmup/WordWarmUpDefault.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.warmup;
2 |
3 | import com.github.houbb.heaven.util.util.CollectionUtil;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordWarmUp;
6 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
7 |
8 | import java.util.List;
9 |
10 | /**
11 | * 默认策略
12 | * @since 1.0.0
13 | */
14 | public class WordWarmUpDefault implements IWordWarmUp {
15 |
16 | @Override
17 | public void warmUp(SensitiveWordBs sensitiveWordBs, IWordContext wordContext, List wordAllowList, List wordDenyList) {
18 | String testInfo = "sensitive-word";
19 | if(CollectionUtil.isNotEmpty(wordAllowList)) {
20 | testInfo = testInfo + " " + wordAllowList.get(0);
21 | }
22 | if(CollectionUtil.isNotEmpty(wordDenyList)) {
23 | testInfo = testInfo + " " + wordDenyList.get(0);
24 | }
25 |
26 | // 只能说优化,但是无法杜绝
27 | for(int i = 0; i < 5; i++) {
28 | sensitiveWordBs.findAll(testInfo);
29 | sensitiveWordBs.findFirst(testInfo);
30 | sensitiveWordBs.contains(testInfo);
31 | sensitiveWordBs.replace(testInfo);
32 | sensitiveWordBs.tags(testInfo);
33 | }
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/spring/SpringSensitiveWordConfig.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.spring;
2 |
3 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
4 | import com.github.houbb.sensitive.word.spring.annotation.Autowired;
5 | import com.github.houbb.sensitive.word.spring.annotation.Bean;
6 | import com.github.houbb.sensitive.word.spring.annotation.Configuration;
7 | import com.github.houbb.sensitive.word.spring.database.MyDdWordAllow;
8 | import com.github.houbb.sensitive.word.spring.database.MyDdWordDeny;
9 | import com.github.houbb.sensitive.word.support.allow.WordAllows;
10 |
11 | /**
12 | * @author binbin.hou
13 | * @since 1.0.0
14 | */
15 | @Configuration
16 | public class SpringSensitiveWordConfig {
17 |
18 | @Autowired
19 | private MyDdWordAllow myDdWordAllow;
20 |
21 | @Autowired
22 | private MyDdWordDeny myDdWordDeny;
23 |
24 | /**
25 | * 初始化引导类
26 | * @return 初始化引导类
27 | * @since 1.0.0
28 | */
29 | @Bean
30 | public SensitiveWordBs sensitiveWordBs() {
31 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
32 | .wordAllow(WordAllows.chains(WordAllows.defaults(), myDdWordAllow))
33 | .wordDeny(myDdWordDeny)
34 | // 各种其他配置
35 | .init();
36 |
37 | return sensitiveWordBs;
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/combine/format/WordFormatCombine.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.combine.format;
2 |
3 | import com.github.houbb.heaven.util.guava.Guavas;
4 | import com.github.houbb.sensitive.word.api.IWordFormat;
5 | import com.github.houbb.sensitive.word.api.IWordContext;
6 | import com.github.houbb.sensitive.word.support.format.WordFormats;
7 |
8 | import java.util.List;
9 |
10 | /**
11 | * @author d
12 | * @since 0.8.0
13 | */
14 | public class WordFormatCombine extends AbstractWordFormatCombine {
15 |
16 | @Override
17 | protected List getWordFormatList(IWordContext context) {
18 | List charFormats = Guavas.newArrayList();
19 | if(context.ignoreEnglishStyle()) {
20 | charFormats.add(WordFormats.ignoreEnglishStyle());
21 | }
22 | if(context.ignoreCase()) {
23 | charFormats.add(WordFormats.ignoreCase());
24 | }
25 | if(context.ignoreWidth()) {
26 | charFormats.add(WordFormats.ignoreWidth());
27 | }
28 | if(context.ignoreNumStyle()) {
29 | charFormats.add(WordFormats.ignoreNumStyle());
30 | }
31 | if(context.ignoreChineseStyle()) {
32 | charFormats.add(WordFormats.ignoreChineseStyle());
33 | }
34 |
35 | return charFormats;
36 | }
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.utils;
2 |
3 | /**
4 | * @since 0.17.0
5 | */
6 | public class InnerCharUtils {
7 |
8 | /**
9 | * 转换为半角
10 | * @param original 原始
11 | * @return 半角
12 | * @since 0.29.2
13 | */
14 | public static char toHalfWidth(char original) {
15 | // 全角空格
16 | if (original == '\u3000') return ' ';
17 | // 其他可转换全角字符
18 | if (original >= '\uFF01' && original <= '\uFF5E') {
19 | return (char) (original - 0xFEE0);
20 | }
21 | // 其他字符保持不变
22 | return original;
23 | }
24 |
25 |
26 | /**
27 | * 转换为整数
28 | * @param text 文本
29 | * @return 整数
30 | * @since 1.18.0
31 | */
32 | public static int parseInt(String text) {
33 | int len = text.length();
34 |
35 | int sum = 0;
36 |
37 | int weight = 1;
38 | for(int i = len-1; i >= 0; i--) {
39 | int val = getCharInt(text.charAt(i));
40 |
41 | sum += weight * val;
42 |
43 | weight *= 10;
44 | }
45 | return sum;
46 | }
47 |
48 | /**
49 | * 获取 int char 对应的真实值
50 | * @param c 字符
51 | * @return 结果
52 | * @since 1.18.0
53 | */
54 | public static int getCharInt(final char c) {
55 | return c - '0';
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.api;
2 |
3 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
4 | import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
5 | import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
6 |
7 | import java.util.Collection;
8 |
9 | /**
10 | * 敏感词 map
11 | * @author binbin.hou
12 | * @since 0.0.1
13 | */
14 | public interface IWordData extends ISensitiveWordDestroy {
15 |
16 | /**
17 | * 初始化敏感词 map
18 | * @param collection 集合信息
19 | * @since 0.0.1
20 | */
21 | void initWordData(Collection collection);
22 |
23 | /**
24 | * 删除敏感词
25 | * @param collection 单词
26 | * @since 0.19.0
27 | */
28 | void removeWord(Collection collection);
29 |
30 | /**
31 | * 新增敏感词
32 | * @param collection 敏感词集合
33 | * @since 0.19.0
34 | */
35 | void addWord(Collection collection);
36 |
37 | /**
38 | * 是否包含敏感词
39 | * @param stringBuilder 缓冲
40 | * @param innerContext 上下文
41 | * @return 是否包含
42 | * @since 0.5.0
43 | * @see WordValidModeEnum#FAIL_FAST 建议使用快速返回模式
44 | */
45 | WordContainsTypeEnum contains(final StringBuilder stringBuilder,
46 | final InnerSensitiveWordContext innerContext);
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.result;
2 |
3 | import com.github.houbb.heaven.util.util.CollectionUtil;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordResult;
6 | import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
7 | import com.github.houbb.sensitive.word.utils.InnerWordTagUtils;
8 |
9 | import java.util.Set;
10 |
11 | /**
12 | * 单词+对应的标签信息
13 | *
14 | * @author binbin.hou
15 | * @since 0.12.0
16 | */
17 | public class WordResultHandlerWordTags extends AbstractWordResultHandler {
18 |
19 | @Override
20 | protected WordTagsDto doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
21 | WordTagsDto dto = new WordTagsDto();
22 |
23 | // 截取
24 | String word = InnerWordCharUtils.getString(originalText, wordResult);
25 |
26 | // 获取 tags (使用清理后的单词查找标签)
27 | Set wordTags = InnerWordTagUtils.tags(word, wordContext);
28 |
29 | // 如果为空,则尝试使用命中的敏感词匹配 v0.25.1 bug105
30 | if(CollectionUtil.isEmpty(wordTags)) {
31 | wordTags = InnerWordTagUtils.tags(wordResult.word(), wordContext);
32 | }
33 |
34 | dto.setWord(word);
35 | dto.setTags(wordTags);
36 |
37 | return dto;
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/tag/AbstractWordTagInit.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.tag;
2 |
3 | import com.github.houbb.heaven.support.pipeline.Pipeline;
4 | import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
5 | import com.github.houbb.heaven.util.util.CollectionUtil;
6 | import com.github.houbb.sensitive.word.api.IWordTag;
7 |
8 | import java.util.HashSet;
9 | import java.util.List;
10 | import java.util.Set;
11 |
12 | /**
13 | * 抽象的单词标签初始化引导类
14 | *
15 | * @since 0.24.0
16 | */
17 | public abstract class AbstractWordTagInit extends AbstractWordTag {
18 |
19 | /**
20 | * 初始化列表
21 | *
22 | * @param pipeline 当前列表泳道
23 | * @since 0.24.0
24 | */
25 | protected abstract void init(final Pipeline pipeline);
26 |
27 | @Override
28 | public Set doGetTag(String word) {
29 | Pipeline pipeline = new DefaultPipeline<>();
30 | this.init(pipeline);
31 |
32 | Set resultSet = new HashSet<>();
33 | List wordTagList = pipeline.list();
34 | for (IWordTag wordTag : wordTagList) {
35 | Set tempTagSet = wordTag.getTag(word);
36 | if(CollectionUtil.isNotEmpty(tempTagSet)) {
37 | resultSet.addAll(tempTagSet);
38 | }
39 | }
40 |
41 | return resultSet;
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowInit.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.allow;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.heaven.support.pipeline.Pipeline;
5 | import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
6 | import com.github.houbb.sensitive.word.api.IWordAllow;
7 |
8 | import java.util.ArrayList;
9 | import java.util.List;
10 |
11 | /**
12 | * 初始化类
13 | *
14 | * @author binbin.hou
15 | * @since 0.0.13
16 | */
17 | @ThreadSafe
18 | public abstract class WordAllowInit implements IWordAllow {
19 |
20 | /**
21 | * 初始化列表
22 | *
23 | * @param pipeline 当前列表泳道
24 | * @since 0.0.13
25 | */
26 | protected abstract void init(final Pipeline pipeline);
27 |
28 | @Override
29 | public List allow() {
30 | Pipeline pipeline = new DefaultPipeline<>();
31 | this.init(pipeline);
32 |
33 | List results = new ArrayList<>();
34 | List wordAllows = pipeline.list();
35 | for (IWordAllow wordAllow : wordAllows) {
36 | List allowList = wordAllow.allow();
37 | if (allowList == null) {
38 | allowList = new ArrayList<>();
39 | }
40 | results.addAll(allowList);
41 | }
42 |
43 | return results;
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/data/DictNumTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.data;
2 |
3 | import com.github.houbb.heaven.util.io.FileUtil;
4 | import org.junit.Ignore;
5 | import org.junit.Test;
6 |
7 | import java.util.List;
8 |
9 | /**
10 | * 数据数据的格式统一化
11 | * @author binbin.hou
12 | * @since 0.0.5
13 | */
14 | @Ignore
15 | public class DictNumTest {
16 |
17 | /**
18 | * 统一格式
19 | *
20 | * 1. 将所有的大写字母统一转换为小写
21 | * 2. 将所有的全角转换为半角
22 | * 3. 移除所有【空格】【符号】(这个就是各种符号的过滤了)
23 | * 4. 繁体字统一转换为简体字
24 | * @since 0.0.3
25 | */
26 | @Test
27 | @Ignore
28 | public void formatTest() {
29 | final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
30 | final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
31 |
32 | List words = FileUtil.readAllLines(sourceFile);
33 | // List formats = CollectionUtil.toList(words, new IHandler() {
34 | // @Override
35 | // public String handle(String string) {
36 | // // 数字的格式化统一处理
37 | // return NumUtils.getMappingString(string);
38 | // }
39 | // });
40 |
41 | List resultList = DataUtil.disctinctAndSort(words);
42 | FileUtil.write(targetFile, resultList);
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/release_rm.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | echo "============================= RELEASE START..."
3 |
4 | ## 版本号信息(需要手动指定)
5 | oldVersion="1.0.0"
6 | newVersion="1.0.0"
7 | projectName="sisyphus"
8 |
9 | # 删除分支
10 | oldBranchName="release_"${oldVersion}
11 | git branch -d ${oldBranchName}
12 | git push origin --delete ${oldBranchName}
13 |
14 | echo "1. Branch remove success..."
15 |
16 | # 拉取新的分支
17 | newBranchName="release_"${newVersion}
18 | git branch ${newBranchName}
19 | git checkout ${newBranchName}
20 | git push --set-upstream origin ${newBranchName}
21 |
22 | echo "2. NEW BRANCH DONE."
23 |
24 | # 修改新分支的版本号
25 | ## snapshot 版本号
26 | snapshot_new_version=${newVersion}"-SNAPSHOT"
27 | mvn versions:set -DgroupId=com.github.houbb -DartifactId=${projectName} -DoldVersion=${release_version} -DnewVersion=${snapshot_new_version}
28 | mvn -N versions:update-child-modules
29 | mvn versions:commit
30 |
31 | git add .
32 | git commit -m "modify branch ${release_version} TO ${snapshot_new_version}"
33 | git push
34 | git status
35 | echo "3. MODIFY ${release_version} TO ${snapshot_new_version} DONE."
36 |
37 | echo "============================= BRANCH RE-CREATE END..."
38 |
39 | echo "============================= BRANCH LIST ============================="
40 | git branch -a
41 |
42 | # 使用方式:
43 | # 注意:本脚本用于删除分支,谨慎使用!
44 | # 1. 赋值权限: chmod +x ./release_rm.sh
45 | # 2. 执行: ./release_rm.sh
46 | # Last Update Time: 2018-06-21 11:10:42
47 | # Author: houbb
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordCheck;
5 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
6 | import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
7 | import com.github.houbb.sensitive.word.support.result.WordLengthResult;
8 |
9 | /**
10 | * 未匹配
11 | *
12 | * @author binbin.hou
13 | * @since 0.3.0
14 | */
15 | @ThreadSafe
16 | public class WordCheckNone implements IWordCheck {
17 |
18 | /**
19 | * @since 0.3.0
20 | */
21 | private static final IWordCheck INSTANCE = new WordCheckNone();
22 |
23 | public static IWordCheck getInstance() {
24 | return INSTANCE;
25 | }
26 |
27 | /**
28 | * 只有一个未匹配
29 | */
30 | private static final WordCheckResult NONE_RESULT = WordCheckResult.newInstance()
31 | .type(WordTypeEnum.DEFAULTS.getCode())
32 | .wordLengthResult(WordLengthResult.newInstance())
33 | .checkClass(WordCheckNone.class);
34 |
35 | public static WordCheckResult getNoneResult() {
36 | return NONE_RESULT;
37 | }
38 |
39 | @Override
40 | public WordCheckResult sensitiveCheck(int beginIndex, InnerSensitiveWordContext context) {
41 | return NONE_RESULT;
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/data/StopWordTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.data;
2 |
3 | import com.github.houbb.heaven.support.condition.ICondition;
4 | import com.github.houbb.heaven.support.filter.IFilter;
5 | import com.github.houbb.heaven.util.io.FileUtil;
6 | import com.github.houbb.heaven.util.lang.StringUtil;
7 | import com.github.houbb.heaven.util.util.CharsetUtil;
8 | import com.github.houbb.heaven.util.util.CollectionUtil;
9 | import org.junit.Ignore;
10 | import org.junit.Test;
11 |
12 | import java.util.Collections;
13 | import java.util.List;
14 |
15 | /**
16 | * 停止词数据初始化
17 | * @author binbin.hou
18 | * @since 0.0.3
19 | */
20 | @Ignore
21 | public class StopWordTest {
22 |
23 | /**
24 | * 中文测试
25 | * @since 0.0.3
26 | */
27 | @Test
28 | @Ignore
29 | public void zhTest() {
30 | final String sourceFile = "stopword.txt";
31 | final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\stopword_zh.txt";
32 |
33 | List allLines = DataUtil.distinctLines(sourceFile);
34 |
35 | List zhLines = CollectionUtil.conditionList(allLines, new ICondition() {
36 | @Override
37 | public boolean condition(String s) {
38 | return CharsetUtil.isAllChinese(s);
39 | }
40 | });
41 |
42 | FileUtil.write(targetFile, zhLines);
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.api;
2 |
3 | import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
4 |
5 | import java.util.List;
6 |
7 | /**
8 | * 核心方法
9 | * @since 0.3.2
10 | */
11 | public interface ISensitiveWord {
12 |
13 | /**
14 | * 返回所有对应的敏感词
15 | * @param string 原始字符串
16 | * @param context 上下文
17 | * @return 结果
18 | * @since 0.0.1
19 | * @see WordValidModeEnum#FAIL_OVER 建议使用全部检测返回模式
20 | */
21 | List findAll(final String string,
22 | final IWordContext context);
23 |
24 | /**
25 | * 返回第一个对应的敏感词
26 | * @param string 原始字符串
27 | * @param context 上下文
28 | * @return 结果
29 | * @since 0.3.2
30 | */
31 | IWordResult findFirst(final String string,
32 | final IWordContext context);
33 |
34 | /**
35 | * 替换所有敏感词内容
36 | *
37 | * ps: 这里可以添加优化。
38 | *
39 | * @param target 目标字符串
40 | * @param context 上下文
41 | * @return 替换后结果
42 | * @since 0.3.2
43 | */
44 | String replace(final String target,
45 | final IWordContext context);
46 |
47 | /**
48 | * 包含
49 | * @param string 字符串
50 | * @param context 上下文
51 | * @return 结果
52 | * @since 0.3.2
53 | */
54 | boolean contains(final String string,
55 | final IWordContext context);
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenyInit.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.deny;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.heaven.support.pipeline.Pipeline;
5 | import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
6 | import com.github.houbb.heaven.util.io.StreamUtil;
7 | import com.github.houbb.sensitive.word.api.IWordDeny;
8 |
9 | import java.util.ArrayList;
10 | import java.util.List;
11 |
12 | /**
13 | * 初始化类
14 | *
15 | * @author binbin.hou
16 | * @since 0.0.13
17 | */
18 | @ThreadSafe
19 | public abstract class WordDenyInit implements IWordDeny {
20 |
21 | /**
22 | * 初始化列表
23 | *
24 | * @param pipeline 当前列表泳道
25 | * @since 0.0.13
26 | */
27 | protected abstract void init(final Pipeline pipeline);
28 |
29 | @Override
30 | public List deny() {
31 | Pipeline pipeline = new DefaultPipeline<>();
32 | this.init(pipeline);
33 |
34 | List results = new ArrayList<>();
35 | List wordDenies = pipeline.list();
36 | for (IWordDeny wordDeny : wordDenies) {
37 | List denyList = wordDeny.deny();
38 | if (denyList == null) {
39 | denyList = new ArrayList<>();
40 | }
41 | results.addAll(denyList);
42 | }
43 |
44 | return results;
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/memory/DataMemoryTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.memory;
2 |
3 | import com.github.houbb.heaven.util.io.StreamUtil;
4 | import com.github.houbb.sensitive.word.api.IWordData;
5 | import com.github.houbb.sensitive.word.support.data.WordDatas;
6 | import org.apache.lucene.util.RamUsageEstimator;
7 | import org.junit.Ignore;
8 | import org.junit.Test;
9 |
10 | import java.util.List;
11 |
12 | /**
13 | * 数据内存测试
14 | *
15 | * @since 0.7.0
16 | */
17 | @Ignore
18 | public class DataMemoryTest {
19 |
20 | /**
21 | * 35.5 MB
22 | */
23 | @Test
24 | public void hashMapTest() {
25 | List allLines = StreamUtil.readAllLines("/sensitive_word_dict.txt");
26 | IWordData wordData = WordDatas.defaults();
27 |
28 | wordData.initWordData(allLines);
29 |
30 | //计算指定对象及其引用树上的所有对象的综合大小,返回可读的结果,如:2KB
31 | String humanSize = RamUsageEstimator.humanSizeOf(wordData);
32 | System.out.println(humanSize);
33 | }
34 |
35 |
36 | //33.4 MB
37 | @Test
38 | public void treeTest() {
39 | List allLines = StreamUtil.readAllLines("/sensitive_word_dict.txt");
40 | IWordData wordData = WordDatas.tree();
41 |
42 | wordData.initWordData(allLines);
43 |
44 | //计算指定对象及其引用树上的所有对象的综合大小,返回可读的结果,如:2KB
45 | String humanSize = RamUsageEstimator.humanSizeOf(wordData);
46 | System.out.println(humanSize);
47 | }
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/benchmark/CharUtilPerfTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.benchmark;
2 |
3 | import com.github.houbb.heaven.util.lang.CharUtil;
4 | import com.github.houbb.sensitive.word.utils.InnerCharUtils;
5 |
6 | public class CharUtilPerfTest {
7 |
8 |
9 | private static final int COUNT = 10_00_000;
10 |
11 | public static void main(String[] args) {
12 | char[] testData = new char[COUNT];
13 | for (int i = 0; i < COUNT; i++) {
14 | testData[i] = (char) ('A' + (i % 52)); // A-Z a-z
15 | }
16 |
17 | // 测试新小写
18 | // 测试原始半角
19 | char[] fullWidthData = new char[COUNT];
20 | for (int i = 0; i < COUNT; i++) {
21 | fullWidthData[i] = (char) ('\uFF01' + (i % 94)); // 常见全角字符
22 | }
23 |
24 | long t5 = System.currentTimeMillis();
25 | char sum3 = 0;
26 | for (char c : fullWidthData) {
27 | sum3 += CharUtil.toHalfWidth(c);
28 | }
29 | long t6 = System.currentTimeMillis();
30 | System.out.println("原始 toHalfWidth 耗时: " + (t6 - t5) + "ms, sum=" + sum3);
31 |
32 | // 测试新半角
33 | long t7 = System.currentTimeMillis();
34 | char sum4 = 0;
35 | for (char c : fullWidthData) {
36 | sum4 += InnerCharUtils.toHalfWidth(c);
37 | }
38 | long t8 = System.currentTimeMillis();
39 | System.out.println("优化 toHalfWidth 耗时: " + (t8 - t7) + "ms, sum=" + sum4);
40 | }
41 |
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNum.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordCheck;
5 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
6 | import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
7 |
8 | /**
9 | * 敏感词监测实现
10 | *
11 | * 这里可以提供一个公共的父类。
12 | * @author binbin.hou
13 | * @since 0.0.5
14 | */
15 | @ThreadSafe
16 | public class WordCheckNum extends AbstractConditionWordCheck {
17 |
18 | /**
19 | * @since 0.3.0
20 | */
21 | private static final IWordCheck INSTANCE = new WordCheckNum();
22 |
23 | public static IWordCheck getInstance() {
24 | return INSTANCE;
25 | }
26 |
27 | @Override
28 | protected Class extends IWordCheck> getSensitiveCheckClass() {
29 | return WordCheckNum.class;
30 | }
31 |
32 | @Override
33 | protected String getType() {
34 | return WordTypeEnum.NUM.getCode();
35 | }
36 |
37 | @Override
38 | protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
39 | return Character.isDigit(mappingChar);
40 | }
41 |
42 | @Override
43 | protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveWordContext checkContext) {
44 | int bufferLen = stringBuilder.length();
45 | return bufferLen >= checkContext.wordContext().sensitiveCheckNumLen();
46 | }
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditionInit.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.resultcondition;
2 |
3 | import com.github.houbb.heaven.support.pipeline.Pipeline;
4 | import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
5 | import com.github.houbb.sensitive.word.api.IWordContext;
6 | import com.github.houbb.sensitive.word.api.IWordResult;
7 | import com.github.houbb.sensitive.word.api.IWordResultCondition;
8 | import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
9 |
10 | import java.util.List;
11 |
12 | /**
13 | * 结果条件的的初始化类
14 | *
15 | * @since 0.23.0
16 | */
17 | public abstract class WordResultConditionInit extends AbstractWordResultCondition {
18 |
19 | /**
20 | * 初始化列表
21 | *
22 | * @param pipeline 当前列表泳道
23 | * @since 0.0.13
24 | */
25 | protected abstract void init(final Pipeline pipeline);
26 |
27 | @Override
28 | protected boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
29 | Pipeline pipeline = new DefaultPipeline<>();
30 | this.init(pipeline);
31 | List conditionList = pipeline.list();
32 |
33 | // 必须满足所有
34 | for(IWordResultCondition wordResultCondition : conditionList) {
35 | if(!wordResultCondition.match(wordResult, text, modeEnum, context)) {
36 | return false;
37 | }
38 | }
39 |
40 | return true;
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.deny;
2 |
3 | import com.github.houbb.heaven.support.pipeline.Pipeline;
4 | import com.github.houbb.heaven.util.util.ArrayUtil;
5 | import com.github.houbb.sensitive.word.api.IWordDeny;
6 |
7 | /**
8 | * 所有拒绝的结果
9 | * @author binbin.hou
10 | * @since 0.0.13
11 | */
12 | public final class WordDenys {
13 |
14 | private WordDenys(){}
15 |
16 | /**
17 | * 责任链
18 | * @param wordDeny 拒绝
19 | * @param others 其他
20 | * @return 结果
21 | * @since 0.0.13
22 | */
23 | public static IWordDeny chains(final IWordDeny wordDeny,
24 | final IWordDeny... others) {
25 | return new WordDenyInit() {
26 | @Override
27 | protected void init(Pipeline pipeline) {
28 | pipeline.addLast(wordDeny);
29 |
30 | if(ArrayUtil.isNotEmpty(others)) {
31 | for(IWordDeny other : others) {
32 | pipeline.addLast(other);
33 | }
34 | }
35 | }
36 | };
37 | }
38 |
39 | /**
40 | * 系统实现
41 | * @return 结果
42 | * @since 0.0.13
43 | */
44 | public static IWordDeny defaults() {
45 | return WordDenySystem.getInstance();
46 | }
47 |
48 | /**
49 | * 空实现
50 | * @return 结果
51 | * @since 0.19.13
52 | */
53 | public static IWordDeny empty() {
54 | return new WordDenyEmpty();
55 | }
56 |
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/define/SensitiveWordBsDefineTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.define;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordAllow;
4 | import com.github.houbb.sensitive.word.api.IWordDeny;
5 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
6 | import com.github.houbb.sensitive.word.support.allow.WordAllows;
7 | import com.github.houbb.sensitive.word.support.deny.WordDenys;
8 | import org.junit.Assert;
9 | import org.junit.Test;
10 |
11 | /**
12 | * @author binbin.hou
13 | * @since 1.0.0
14 | */
15 | public class SensitiveWordBsDefineTest {
16 |
17 | @Test
18 | public void defineDenyTest() {
19 | String text = "这是一个测试,我的自定义敏感词。";
20 |
21 | SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
22 | .wordDeny(new MyWordDeny())
23 | .wordAllow(new MyWordAllow())
24 | .init();
25 |
26 | Assert.assertEquals("[我的自定义敏感词]", wordBs.findAll(text).toString());
27 | }
28 |
29 | @Test
30 | public void defineChainsTest() {
31 | String text = "这是一个测试。我的自定义敏感词。";
32 |
33 | IWordDeny wordDeny = WordDenys.chains(WordDenys.defaults(), new MyWordDeny());
34 | IWordAllow wordAllow = WordAllows.chains(WordAllows.defaults(), new MyWordAllow());
35 |
36 | SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
37 | .wordDeny(wordDeny)
38 | .wordAllow(wordAllow)
39 | .init();
40 |
41 | Assert.assertEquals("[我的自定义敏感词]", wordBs.findAll(text).toString());
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.allow;
2 |
3 | import com.github.houbb.heaven.support.pipeline.Pipeline;
4 | import com.github.houbb.heaven.util.util.ArrayUtil;
5 | import com.github.houbb.sensitive.word.api.IWordAllow;
6 |
7 | /**
8 | * 所有允许的结果
9 | * @author binbin.hou
10 | * @since 0.0.13
11 | */
12 | public final class WordAllows {
13 |
14 | private WordAllows(){}
15 |
16 | /**
17 | * 责任链
18 | * @param wordAllow 允许
19 | * @param others 其他
20 | * @return 结果
21 | * @since 0.0.13
22 | */
23 | public static IWordAllow chains(final IWordAllow wordAllow,
24 | final IWordAllow... others) {
25 | return new WordAllowInit() {
26 | @Override
27 | protected void init(Pipeline pipeline) {
28 | pipeline.addLast(wordAllow);
29 |
30 | if(ArrayUtil.isNotEmpty(others)) {
31 | for(IWordAllow other : others) {
32 | pipeline.addLast(other);
33 | }
34 | }
35 | }
36 | };
37 | }
38 |
39 | /**
40 | * 系统实现
41 | * @return 结果
42 | * @since 0.0.13
43 | */
44 | public static IWordAllow defaults() {
45 | return WordAllowSystem.getInstance();
46 | }
47 |
48 |
49 | /**
50 | * 空实现,可测试用
51 | * @return 结果
52 | * @since 0.19.0
53 | */
54 | public static IWordAllow empty() {
55 | return new WordAllowEmpty();
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.util.List;
7 |
8 | /**
9 | * project: sensitive-word-SensitiveWordBsTest
10 | * create on 2020/1/7 23:43
11 | *
12 | * @author Administrator
13 | * @since 0.0.9
14 | */
15 | public class SensitiveWordBsEmailTest {
16 |
17 | /**
18 | * 邮箱测试
19 | * @since 0.0.9
20 | */
21 | @Test
22 | public void emailEnglishTest() {
23 | final String text = "楼主好人,邮箱 sensitiveword@xx.com";
24 |
25 | List wordList = SensitiveWordBs.newInstance()
26 | .enableEmailCheck(true)
27 | .init()
28 | .findAll(text);
29 | Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString());
30 | }
31 |
32 | /**
33 | * 邮箱测试
34 | * @since 0.0.9
35 | */
36 | @Test
37 | public void emailNumberTest() {
38 | final String text = "楼主好人,邮箱 123456789@xx.com";
39 |
40 | List wordList = SensitiveWordBs.newInstance()
41 | .enableEmailCheck(true)
42 | .init()
43 | .findAll(text);
44 | Assert.assertEquals("[123456789@xx.com]", wordList.toString());
45 | }
46 |
47 | @Test
48 | public void emailTest() {
49 | final String text = "你我.他你";
50 | List wordList = SensitiveWordBs.newInstance().init().findAll(text);
51 | Assert.assertEquals("[]", wordList.toString());
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/data/DataUtil.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.data;
2 |
3 | import com.github.houbb.heaven.util.io.FileUtil;
4 | import com.github.houbb.heaven.util.util.CollectionUtil;
5 | import org.junit.Ignore;
6 | import org.junit.Test;
7 |
8 | import java.util.Collection;
9 | import java.util.Collections;
10 | import java.util.List;
11 |
12 | /**
13 | * @author binbin.hou
14 | * @since 0.0.3
15 | */
16 | public class DataUtil {
17 |
18 | /**
19 | * 获取对应文件的独一无二内容
20 | * @param name 名称
21 | * @return 结果
22 | * @since 0.0.1
23 | */
24 | public static List distinctLines(final String name) {
25 | final String dir = "D:\\github\\sensitive-word\\src\\main\\resources\\";
26 | final String path = dir + name;
27 | List lines = FileUtil.readAllLines(path);
28 | return CollectionUtil.distinct(lines);
29 | }
30 |
31 | public static List disctinctAndSort(final Collection collection) {
32 | List stringList = CollectionUtil.distinct(collection);
33 | Collections.sort(stringList);
34 |
35 | return stringList;
36 | }
37 |
38 | @Test
39 | @Ignore
40 | public void singleCharTest() {
41 | final String path = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
42 |
43 | List stringList = FileUtil.readAllLines(path);
44 | for(String s : stringList) {
45 | if(s.length() == 1) {
46 | System.out.println(s);
47 | }
48 | }
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.util.List;
7 |
8 | /**
9 | * project: sensitive-word-SensitiveWordBsTest
10 | * create on 2020/1/7 23:43
11 | *
12 | * @author Administrator
13 | * @since 0.0.12
14 | */
15 | public class SensitiveWordBsUrlTest {
16 |
17 | /**
18 | * 忽略中文繁简体
19 | * @since 0.0.12
20 | */
21 | @Test
22 | public void commonUrlTest() {
23 | final String text = "点击链接 https://www.baidu.com 查看答案";
24 |
25 | final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().enableUrlCheck(true).init();
26 | List wordList = sensitiveWordBs.findAll(text);
27 | Assert.assertEquals("[https://www.baidu.com]", wordList.toString());
28 |
29 | Assert.assertEquals("点击链接 ********************* 查看答案", sensitiveWordBs.replace(text));
30 | }
31 |
32 | /**
33 | * 图片测试
34 | *
35 | * (1)可以检测
36 | * (2)默认不替换
37 | *
38 | * @since 0.0.12
39 | */
40 | @Test
41 | public void imageUrlTest() {
42 | final String text = "双击查看大图 http://www.big-image.png 查看";
43 |
44 | final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
45 | .enableUrlCheck(true)
46 | .init();
47 | List wordList = sensitiveWordBs.findAll(text);
48 | Assert.assertEquals("[http://www.big-image.png]", wordList.toString());
49 |
50 | Assert.assertEquals("双击查看大图 ************************ 查看", sensitiveWordBs.replace(text));
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsIgnoreCharTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
4 | import org.junit.Assert;
5 | import org.junit.Test;
6 |
7 | import java.util.List;
8 |
9 | /**
10 | * project: sensitive-word-SensitiveWordBsTest
11 | * create on 2020/1/7 23:43
12 | *
13 | * @author Administrator
14 | * @since 0.11.0
15 | */
16 | public class SensitiveWordBsIgnoreCharTest {
17 |
18 | /**
19 | * 忽略中文繁简体
20 | * @since 0.0.6
21 | */
22 | @Test
23 | public void ignoreChineseStyleTest() {
24 | final String text = "傻@冒,狗+东西";
25 |
26 | //默认因为有特殊字符分割,无法识别
27 | List wordList = SensitiveWordBs.newInstance().init().findAll(text);
28 | Assert.assertEquals("[]", wordList.toString());
29 |
30 | // 指定忽略的字符策略,可自行实现。
31 | List wordList2 = SensitiveWordBs.newInstance()
32 | .charIgnore(SensitiveWordCharIgnores.specialChars())
33 | .init()
34 | .findAll(text);
35 |
36 | Assert.assertEquals("[傻@冒, 狗+东西]", wordList2.toString());
37 | }
38 |
39 | //https://github.com/houbb/sensitive-word/issues/68
40 | @Test
41 | public void ignoreChineseStyleTest2() {
42 | final String text = "傻逼
";
43 |
44 | // 指定忽略的字符策略,可自行实现。
45 | List wordList2 = SensitiveWordBs.newInstance()
46 | .charIgnore(SensitiveWordCharIgnores.specialChars())
47 | .init()
48 | .findAll(text);
49 |
50 | Assert.assertEquals("[傻逼]", wordList2.toString());
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditionWordTagsMatch.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.resultcondition;
2 |
3 | import com.github.houbb.heaven.util.common.ArgUtil;
4 | import com.github.houbb.heaven.util.util.CollectionUtil;
5 | import com.github.houbb.sensitive.word.api.IWordContext;
6 | import com.github.houbb.sensitive.word.api.IWordResult;
7 | import com.github.houbb.sensitive.word.api.IWordTag;
8 | import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
9 |
10 | import java.util.Collection;
11 | import java.util.Set;
12 |
13 | /**
14 | * 结果标签匹配的条件
15 | *
16 | * @since 0.23.0
17 | */
18 | public class WordResultConditionWordTagsMatch extends AbstractWordResultCondition {
19 |
20 | /**
21 | * 指定标签的集合
22 | */
23 | private final Collection tags;
24 |
25 | public WordResultConditionWordTagsMatch(Collection tags) {
26 | ArgUtil.notEmpty(tags, "tags");
27 |
28 | this.tags = tags;
29 | }
30 |
31 | @Override
32 | protected boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
33 | // 判断对应的标签
34 | String word = text.substring(wordResult.startIndex(), wordResult.endIndex());
35 | final IWordTag wordTag = context.wordTag();
36 | Set wordTags = wordTag.getTag(word);
37 |
38 | // 在指定的 tag 中
39 | if(CollectionUtil.isEmpty(wordTags)) {
40 | return false;
41 | }
42 |
43 | for(String tag : tags) {
44 | if(wordTags.contains(tag)) {
45 | return true;
46 | }
47 | }
48 |
49 | return false;
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/data/NumUtilTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.data;
2 |
3 | import org.junit.Ignore;
4 | import org.junit.Test;
5 |
6 | import java.util.Arrays;
7 | import java.util.List;
8 |
9 | /**
10 | * @author binbin.hou
11 | * @since 0.0.11
12 | */
13 | @Ignore
14 | public class NumUtilTest {
15 |
16 | @Test
17 | public void groupNumTest() {
18 | String nums = "123456789" +
19 | "一二三四五六七八九" +
20 | "壹贰叁肆伍陆柒捌玖" +
21 | "¹²³⁴⁵⁶⁷⁸⁹" +
22 | "₁₂₃₄₅₆₇₈₉" +
23 | "①②③④⑤⑥⑦⑧⑨" +
24 | "⑴⑵⑶⑷⑸⑹⑺⑻⑼" +
25 | "⒈⒉⒊⒋⒌⒍⒎⒏⒐" +
26 | "❶❷❸❹❺❻❼❽❾" +
27 | "➀➁➂➃➄➅➆➇➈" +
28 | "➊➋➌➍➎➏➐➑➒" +
29 | "㈠㈡㈢㈣㈤㈥㈦㈧㈨" +
30 | "⓵⓶⓷⓸⓹⓺⓻⓼⓽" +
31 | "㊀㊁㊂㊃㊄㊅㊆㊇㊈" +
32 | "ⅰⅱⅲⅳⅴⅵⅶⅷⅸ" +
33 | "ⅠⅡⅢⅣⅤⅥⅦⅧⅨ";
34 |
35 | for(int l = 0; l < 9; l++) {
36 | for(int i = 0; i < 16; i++) {
37 | System.out.print(nums.charAt(i*9+l)+" ");
38 | }
39 | System.out.println();
40 | }
41 |
42 | }
43 |
44 |
45 | @Test
46 | public void groupEnglishTest() {
47 | List lines = Arrays.asList("ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ",
48 | "ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ",
49 | "⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵");
50 | for(int i = 0; i < 26; i++) {
51 | System.out.print(lines.get(0).charAt(i)+" ");
52 | System.out.print(lines.get(1).charAt(i)+" ");
53 | System.out.print(lines.get(2).charAt(i));
54 | System.out.println();
55 | }
56 |
57 | }
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckArray.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.heaven.util.common.ArgUtil;
4 | import com.github.houbb.sensitive.word.api.IWordCheck;
5 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
6 | import com.github.houbb.sensitive.word.support.result.WordLengthResult;
7 |
8 | import java.util.List;
9 |
10 | /**
11 | * 集合
12 | * @author binbin.hou
13 | * @since 0.30.0
14 | */
15 | public class WordCheckArray implements IWordCheck {
16 |
17 | private final IWordCheck[] sensitiveChecks;
18 | private final int size;
19 | public WordCheckArray(List sensitiveChecks) {
20 | ArgUtil.notEmpty(sensitiveChecks, "sensitiveChecks");
21 |
22 | this.size = sensitiveChecks.size();
23 | this.sensitiveChecks = new IWordCheck[size];
24 | for(int i = 0; i < size; i++) {
25 | this.sensitiveChecks[i] = sensitiveChecks.get(i);
26 | }
27 | }
28 |
29 | @Override
30 | public WordCheckResult sensitiveCheck(int beginIndex, InnerSensitiveWordContext checkContext) {
31 | // 循环调用
32 | for(int i = 0; i < size; i++) {
33 | IWordCheck sensitiveCheck = sensitiveChecks[i];
34 | WordCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext);
35 |
36 | WordLengthResult wordLengthResult = result.wordLengthResult();
37 | if(wordLengthResult.wordAllowLen() > 0 || wordLengthResult.wordDenyLen()> 0) {
38 | return result;
39 | }
40 | }
41 |
42 | // 这里直接进行正则表达式相关的调用。
43 | // 默认返回 0
44 | return WordCheckNone.getNoneResult();
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.heaven.support.pipeline.Pipeline;
4 | import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
5 | import com.github.houbb.sensitive.word.api.IWordCheck;
6 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
7 | import com.github.houbb.sensitive.word.support.result.WordLengthResult;
8 |
9 | import java.util.List;
10 |
11 | /**
12 | * 检测初始化类
13 | * @since 0.3.0
14 | */
15 | @Deprecated
16 | public abstract class WordCheckInit implements IWordCheck {
17 |
18 | /**
19 | * 初始化列表
20 | *
21 | * @param pipeline 当前列表泳道
22 | * @since 0.0.13
23 | */
24 | protected abstract void init(final Pipeline pipeline);
25 |
26 |
27 | @Override
28 | public WordCheckResult sensitiveCheck(final int beginIndex,
29 | final InnerSensitiveWordContext checkContext) {
30 |
31 | Pipeline pipeline = new DefaultPipeline<>();
32 | this.init(pipeline);
33 | List sensitiveChecks = pipeline.list();
34 |
35 | // 循环调用
36 | for(IWordCheck sensitiveCheck : sensitiveChecks) {
37 | WordCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext);
38 |
39 | WordLengthResult wordLengthResult = result.wordLengthResult();
40 | if(wordLengthResult.wordAllowLen() > 0 || wordLengthResult.wordDenyLen()> 0) {
41 | return result;
42 | }
43 | }
44 |
45 | // 这里直接进行正则表达式相关的调用。
46 | // 默认返回 0
47 | return WordCheckNone.getNoneResult();
48 | }
49 |
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditionEnglishWordNumMatch.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.resultcondition;
2 |
3 | import com.github.houbb.heaven.util.lang.CharUtil;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordResult;
6 | import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
7 |
8 | /**
9 | * 英文单词和数字必须要全词匹配
10 | *
11 | * https://github.com/houbb/sensitive-word/issues/77
12 | *
13 | * @since 0.20.0
14 | */
15 | public class WordResultConditionEnglishWordNumMatch extends AbstractWordResultCondition {
16 |
17 | @Override
18 | protected boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
19 | final int startIndex = wordResult.startIndex();
20 | final int endIndex = wordResult.endIndex();
21 | // 判断处理,判断前一个字符是否为英文。如果是,则不满足
22 | if(startIndex > 0) {
23 | char preC = text.charAt(startIndex-1);
24 | if(CharUtil.isDigitOrLetter(preC)) {
25 | return false;
26 | }
27 | }
28 |
29 | // 判断后一个字符是否为英文
30 | // v0.19.1 修正 cp cpm 单个字符错误命中问题
31 | if(endIndex < text.length()) {
32 | char afterC = text.charAt(endIndex);
33 | if(CharUtil.isDigitOrLetter(afterC)) {
34 | return false;
35 | }
36 | }
37 |
38 | // 判断当前是否为英文单词
39 | for(int i = startIndex; i < endIndex; i++) {
40 | char c = text.charAt(i);
41 | if(!CharUtil.isDigitOrLetter(c)) {
42 | return true;
43 | }
44 | }
45 |
46 | return true;
47 | }
48 |
49 |
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditionEnglishWordMatch.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.resultcondition;
2 |
3 | import com.github.houbb.heaven.util.lang.CharUtil;
4 | import com.github.houbb.heaven.util.util.CharsetUtil;
5 | import com.github.houbb.sensitive.word.api.IWordContext;
6 | import com.github.houbb.sensitive.word.api.IWordResult;
7 | import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
8 |
9 | /**
10 | * 英文单词必须要全词匹配
11 | *
12 | * https://github.com/houbb/sensitive-word/issues/45
13 | *
14 | * @since 0.13.0
15 | */
16 | public class WordResultConditionEnglishWordMatch extends AbstractWordResultCondition {
17 |
18 | @Override
19 | protected boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
20 | final int startIndex = wordResult.startIndex();
21 | final int endIndex = wordResult.endIndex();
22 | // 判断处理,判断前一个字符是否为英文。如果是,则不满足
23 | if(startIndex > 0) {
24 | char preC = text.charAt(startIndex-1);
25 | if(CharUtil.isEnglish(preC)) {
26 | return false;
27 | }
28 | }
29 |
30 | // 判断后一个字符是否为英文
31 | // v0.19.1 修正 cp cpm 单个字符错误命中问题
32 | if(endIndex < text.length()) {
33 | char afterC = text.charAt(endIndex);
34 | if(CharUtil.isEnglish(afterC)) {
35 | return false;
36 | }
37 | }
38 |
39 | // 判断当前是否为英文单词
40 | for(int i = startIndex; i < endIndex; i++) {
41 | char c = text.charAt(i);
42 | if(!CharUtil.isEnglish(c)) {
43 | return true;
44 | }
45 | }
46 |
47 | return true;
48 | }
49 |
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/AbstractWordAllowDenyCombine.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.combine.allowdeny;
2 |
3 | import com.github.houbb.heaven.util.util.CollectionUtil;
4 | import com.github.houbb.sensitive.word.api.IWordAllow;
5 | import com.github.houbb.sensitive.word.api.IWordContext;
6 | import com.github.houbb.sensitive.word.api.IWordDeny;
7 | import com.github.houbb.sensitive.word.api.combine.IWordAllowDenyCombine;
8 | import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
9 |
10 | import java.util.Collection;
11 | import java.util.Collections;
12 | import java.util.List;
13 |
14 | /**
15 | * @author d
16 | * @since 0.8.0
17 | */
18 | public abstract class AbstractWordAllowDenyCombine implements IWordAllowDenyCombine {
19 |
20 | protected abstract Collection doGetActualDenyList(List allowList,
21 | List denyList,
22 | IWordContext context);
23 |
24 | @Override
25 | public Collection getActualDenyList(final List allowList,
26 | final List denyList,
27 | IWordContext context) {
28 | List formatAllowList = InnerWordFormatUtils.formatWordList(allowList, context);
29 | List formatDenyList = InnerWordFormatUtils.formatWordList(denyList, context);
30 |
31 | if (CollectionUtil.isEmpty(formatDenyList)) {
32 | return Collections.emptyList();
33 | }
34 | if (CollectionUtil.isEmpty(formatAllowList)) {
35 | return formatDenyList;
36 | }
37 |
38 | return doGetActualDenyList(formatAllowList, formatDenyList, context);
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.data;
2 |
3 | import com.github.houbb.sensitive.word.api.ISensitiveWordDestroy;
4 |
5 | import java.util.HashMap;
6 | import java.util.Map;
7 |
8 | /**
9 | * 树节点
10 | *
11 | * @since 0.7.0
12 | */
13 | public class WordDataTreeNode implements ISensitiveWordDestroy {
14 |
15 | /**
16 | * 关键词结束标识
17 | */
18 | private boolean end;
19 |
20 | /**
21 | * 子节点(key是下级字符,value是下级节点)
22 | */
23 | private Map subNodeMap;
24 |
25 | public boolean end() {
26 | return end;
27 | }
28 |
29 | public WordDataTreeNode end(boolean end) {
30 | this.end = end;
31 | return this;
32 | }
33 |
34 | public WordDataTreeNode getSubNode(final Character c) {
35 | if(subNodeMap == null) {
36 | return null;
37 | }
38 |
39 | return subNodeMap.get(c);
40 | }
41 | public int getNodeSize() {
42 | if (subNodeMap == null) {
43 | return 0;
44 | }
45 | return subNodeMap.size();
46 | }
47 |
48 | public void clearNode() {
49 | if (subNodeMap == null) {
50 | return;
51 | }
52 | subNodeMap=null;
53 | }
54 |
55 | public void removeNode(final Character c) {
56 | if (subNodeMap == null) {
57 | return;
58 | }
59 | subNodeMap.remove(c);
60 | }
61 |
62 | public WordDataTreeNode addSubNode(Character c, WordDataTreeNode subNode) {
63 | if(this.subNodeMap == null) {
64 | subNodeMap = new HashMap<>();
65 | }
66 |
67 | subNodeMap.put(c, subNode);
68 | return this;
69 | }
70 |
71 | @Override
72 | public void destroy() {
73 | if(subNodeMap != null) {
74 | subNodeMap.clear();
75 | }
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.result;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordResult;
4 |
5 | /**
6 | * @author binbin.hou
7 | * @since 0.1.0
8 | */
9 | public class WordResult implements IWordResult {
10 |
11 | private int startIndex;
12 |
13 | private int endIndex;
14 |
15 | /**
16 | * 词类别
17 | * @since 0.14.0
18 | */
19 | private String type;
20 |
21 | /**
22 | * 单词匹配
23 | * @since 0.25.0
24 | */
25 | private String word;
26 |
27 | private WordResult(){}
28 |
29 | public static WordResult newInstance() {
30 | return new WordResult();
31 | }
32 |
33 | @Override
34 | public int startIndex() {
35 | return startIndex;
36 | }
37 |
38 | public WordResult startIndex(int startIndex) {
39 | this.startIndex = startIndex;
40 | return this;
41 | }
42 |
43 | @Override
44 | public int endIndex() {
45 | return endIndex;
46 | }
47 |
48 | public WordResult endIndex(int endIndex) {
49 | this.endIndex = endIndex;
50 | return this;
51 | }
52 |
53 | @Override
54 | public String type() {
55 | return type;
56 | }
57 |
58 | public WordResult type(String type) {
59 | this.type = type;
60 | return this;
61 | }
62 |
63 | @Override
64 | public String word() {
65 | return word;
66 | }
67 |
68 | public WordResult word(String word) {
69 | this.word = word;
70 | return this;
71 | }
72 |
73 | @Override
74 | public String toString() {
75 | return "WordResult{" +
76 | "startIndex=" + startIndex +
77 | ", endIndex=" + endIndex +
78 | ", type='" + type + '\'' +
79 | ", word='" + word + '\'' +
80 | '}';
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordCheck;
4 | import com.github.houbb.sensitive.word.support.result.WordLengthResult;
5 |
6 | /**
7 | * 敏感信息监测接口结果
8 | *
9 | * 可以使用责任链的模式,循环调用。
10 | * @author binbin.hou
11 | * @since 0.0.12
12 | */
13 | public class WordCheckResult {
14 |
15 | /**
16 | * 命中的黑白名单的长度对象
17 | */
18 | private WordLengthResult wordLengthResult;
19 |
20 | /**
21 | * 检测类
22 | * @since 0.0.12
23 | */
24 | private Class extends IWordCheck> checkClass;
25 |
26 | /**
27 | * 单词类别
28 | * @since 0.14.0
29 | */
30 | private String type;
31 |
32 | private WordCheckResult(){}
33 |
34 | public static WordCheckResult newInstance() {
35 | return new WordCheckResult();
36 | }
37 |
38 | public WordLengthResult wordLengthResult() {
39 | return wordLengthResult;
40 | }
41 |
42 | public WordCheckResult wordLengthResult(WordLengthResult wordLengthResult) {
43 | this.wordLengthResult = wordLengthResult;
44 | return this;
45 | }
46 |
47 | public Class extends IWordCheck> checkClass() {
48 | return checkClass;
49 | }
50 |
51 | public WordCheckResult checkClass(Class extends IWordCheck> checkClass) {
52 | this.checkClass = checkClass;
53 | return this;
54 | }
55 |
56 | public String type() {
57 | return type;
58 | }
59 |
60 | public WordCheckResult type(String type) {
61 | this.type = type;
62 | return this;
63 | }
64 |
65 | @Override
66 | public String toString() {
67 | return "WordCheckResult{" +
68 | "wordLengthResult=" + wordLengthResult +
69 | ", checkClass=" + checkClass +
70 | ", type='" + type + '\'' +
71 | '}';
72 | }
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveWordContext.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.api.context;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordContext;
4 | import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
5 |
6 | import java.util.Map;
7 |
8 | /**
9 | * 内部信息上下文
10 | *
11 | * @author binbin.hou
12 | * @since 0.6.0
13 | */
14 | public class InnerSensitiveWordContext {
15 |
16 | /**
17 | * 原始文本
18 | */
19 | private String originalText;
20 | /**
21 | * 格式化后的字符
22 | */
23 | private Map formatCharMapping;
24 | /**
25 | * 校验模式
26 | */
27 | private WordValidModeEnum modeEnum;
28 | /**
29 | * 原始上下文
30 | */
31 | private IWordContext wordContext;
32 |
33 | public static InnerSensitiveWordContext newInstance() {
34 | return new InnerSensitiveWordContext();
35 | }
36 |
37 | public String originalText() {
38 | return originalText;
39 | }
40 |
41 | public InnerSensitiveWordContext originalText(String text) {
42 | this.originalText = text;
43 | return this;
44 | }
45 |
46 | public Map formatCharMapping() {
47 | return formatCharMapping;
48 | }
49 |
50 | public InnerSensitiveWordContext formatCharMapping(Map formatCharMapping) {
51 | this.formatCharMapping = formatCharMapping;
52 | return this;
53 | }
54 |
55 | public WordValidModeEnum modeEnum() {
56 | return modeEnum;
57 | }
58 |
59 | public InnerSensitiveWordContext modeEnum(WordValidModeEnum modeEnum) {
60 | this.modeEnum = modeEnum;
61 | return this;
62 | }
63 |
64 | public IWordContext wordContext() {
65 | return wordContext;
66 | }
67 |
68 | public InnerSensitiveWordContext wordContext(IWordContext context) {
69 | this.wordContext = context;
70 | return this;
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTagLines.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.tag;
2 |
3 | import com.github.houbb.heaven.util.common.ArgUtil;
4 | import com.github.houbb.heaven.util.lang.StringUtil;
5 | import com.github.houbb.sensitive.word.api.IWordTag;
6 |
7 | import java.util.*;
8 |
9 | /**
10 | * 根据标准的行来处理
11 | *
12 | * 行规范:
13 | *
14 | * 单词 标签1,标签2
15 | *
16 | * @since 0.24.0
17 | */
18 | public class WordTagLines extends AbstractWordTag {
19 |
20 | private final IWordTag wordTag;
21 |
22 | /**
23 | * 词和标签的分隔符
24 | */
25 | private final String wordSplit;
26 | /**
27 | * 标签的分隔符
28 | */
29 | private final String tagSplit;
30 |
31 | public WordTagLines(Collection lines,
32 | final String wordSplit,
33 | final String tagSplit) {
34 | ArgUtil.notNull(lines, "lines");
35 | ArgUtil.notEmpty(wordSplit, "wordSplit");
36 | ArgUtil.notEmpty(tagSplit, "tagSplit");
37 |
38 | this.wordSplit = wordSplit;
39 | this.tagSplit = tagSplit;
40 |
41 | Map> wordTagMap = buildWordTagMap(lines);
42 | wordTag = WordTags.map(wordTagMap);
43 | }
44 |
45 | public WordTagLines(Collection lines) {
46 | this(lines, " ", ",");
47 | }
48 |
49 | private Map> buildWordTagMap(final Collection lines) {
50 | Map> wordTagMap = new HashMap<>();
51 |
52 | for(String line : lines) {
53 | String[] strings = line.split(wordSplit);
54 | String key = strings[0];
55 | Set tags = new HashSet<>(StringUtil.splitToList(strings[1], tagSplit));
56 | wordTagMap.put(key, tags);
57 | }
58 | return wordTagMap;
59 | }
60 |
61 | @Override
62 | protected Set doGetTag(String word) {
63 | return wordTag.getTag(word);
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/support/resultcondition/WordTagsTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.resultcondition;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordDeny;
4 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
5 | import com.github.houbb.sensitive.word.support.allow.WordAllows;
6 | import org.junit.Assert;
7 | import org.junit.Test;
8 |
9 | import java.util.Arrays;
10 | import java.util.List;
11 |
12 | public class WordTagsTest {
13 |
14 |
15 | /**
16 | * 是否包含
17 | *
18 | * @since 0.23.0
19 | */
20 | @Test
21 | public void wordTagsTest() {
22 | // 只关心SE情
23 | SensitiveWordBs sensitiveWordBsYellow = SensitiveWordBs.newInstance()
24 | .wordDeny(new IWordDeny() {
25 | @Override
26 | public List deny() {
27 | return Arrays.asList("商品", "AV");
28 | }
29 | })
30 | .wordAllow(WordAllows.empty())
31 | .wordTag(new MyWordTag())
32 | .wordResultCondition(WordResultConditions.wordTags(Arrays.asList("色情")))
33 | .init();
34 |
35 | // 只关心广告
36 | SensitiveWordBs sensitiveWordBsAd = SensitiveWordBs.newInstance()
37 | .wordDeny(new IWordDeny() {
38 | @Override
39 | public List deny() {
40 | return Arrays.asList("商品", "AV");
41 | }
42 | })
43 | .wordAllow(WordAllows.empty())
44 | .wordTag(new MyWordTag())
45 | .wordResultCondition(WordResultConditions.wordTags(Arrays.asList("广告")))
46 | .init();
47 |
48 | final String text = "这些 AV 商品什么价格?";
49 | Assert.assertEquals("[AV]", sensitiveWordBsYellow.findAll(text).toString());
50 | Assert.assertEquals("[商品]", sensitiveWordBsAd.findAll(text).toString());
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.result;
2 |
3 | /**
4 | * 说明:统一让黑白名单一次遍历,性能优化
5 | *
6 | * @since 0.24.2
7 | */
8 | public class WordLengthResult {
9 | /**
10 | * 白名单长度
11 | */
12 | private int wordAllowLen;
13 | /**
14 | * 黑名单长度
15 | */
16 | private int wordDenyLen;
17 |
18 | /**
19 | * 黑名单匹配词
20 | * @since 0.25.1
21 | */
22 | private String wordDeny;
23 |
24 | /**
25 | * 白名单实际匹配值
26 | * @since 0.25.1
27 | */
28 | private String wordAllow;
29 |
30 | public static WordLengthResult newInstance() {
31 | return new WordLengthResult();
32 | }
33 |
34 | public int wordAllowLen() {
35 | return this.wordAllowLen;
36 | }
37 |
38 | public WordLengthResult wordAllowLen(int wordAllowLen) {
39 | this.wordAllowLen = wordAllowLen;
40 | return this;
41 | }
42 |
43 | public int wordDenyLen() {
44 | return this.wordDenyLen;
45 | }
46 |
47 | public WordLengthResult wordDenyLen(int wordDenyLen) {
48 | this.wordDenyLen = wordDenyLen;
49 | return this;
50 | }
51 |
52 | public String wordDeny() {
53 | return wordDeny;
54 | }
55 |
56 | public WordLengthResult wordDeny(String wordDeny) {
57 | this.wordDeny = wordDeny;
58 | return this;
59 | }
60 |
61 | public String wordAllow() {
62 | return wordAllow;
63 | }
64 |
65 | public WordLengthResult wordAllow(String wordAllow) {
66 | this.wordAllow = wordAllow;
67 | return this;
68 | }
69 |
70 | @Override
71 | public String toString() {
72 | return "WordLengthResult{" +
73 | "wordAllowLen=" + wordAllowLen +
74 | ", wordDenyLen=" + wordDenyLen +
75 | ", wordDeny='" + wordDeny + '\'' +
76 | ", wordAllow='" + wordAllow + '\'' +
77 | '}';
78 | }
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/release.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | echo "============================= RELEASE START..."
3 |
4 | ## 版本号信息(需要手动指定)
5 | version="0.0.1"
6 | newVersion="0.0.2"
7 | projectName="sisyphus"
8 |
9 | # release 项目版本
10 | ## snapshot 版本号
11 | snapshot_version=${version}"-SNAPSHOT"
12 | ## 新的版本号
13 | release_version=${version}
14 |
15 | mvn versions:set -DgroupId=com.github.houbb -DartifactId=${projectName} -DoldVersion=${snapshot_version} -DnewVersion=${release_version}
16 | mvn -N versions:update-child-modules
17 | mvn versions:commit
18 | echo "1. RELEASE ${snapshot_version} TO ${release_version} DONE."
19 |
20 |
21 | # 推送到 github
22 | git add .
23 | git commit -m "release branch ${version}"
24 | git push
25 | git status
26 |
27 | echo "2. PUSH TO GITHUB DONE."
28 |
29 |
30 | # 推送到 maven 中央仓库
31 | mvn clean deploy -P release
32 |
33 | echo "3. PUSH TO MAVEN CENTER DONE."
34 |
35 | # 合并到 master 分支
36 | branchName="release_"${version} # 分支名称
37 | git checkout master
38 | git pull
39 | git checkout ${branchName}
40 | git rebase master
41 | git checkout master
42 | git merge ${branchName}
43 | git push
44 |
45 | echo "4. MERGE TO MASTER DONE."
46 |
47 |
48 | # 拉取新的分支
49 | newBranchName="release_"${newVersion}
50 | git branch ${newBranchName}
51 | git checkout ${newBranchName}
52 | git push --set-upstream origin ${newBranchName}
53 |
54 | echo "5. NEW BRANCH DONE."
55 |
56 | # 修改新分支的版本号
57 | ## snapshot 版本号
58 | snapshot_new_version=${newVersion}"-SNAPSHOT"
59 | mvn versions:set -DgroupId=com.github.houbb -DartifactId=${projectName} -DoldVersion=${release_version} -DnewVersion=${snapshot_new_version}
60 | mvn -N versions:update-child-modules
61 | mvn versions:commit
62 |
63 | git add .
64 | git commit -m "modify branch ${release_version} TO ${snapshot_new_version}"
65 | git push
66 | git status
67 | echo "6. MODIFY ${release_version} TO ${snapshot_new_version} DONE."
68 |
69 | echo "============================= RELEASE END..."
70 |
71 |
72 | # 使用方式:
73 | # 1. 赋值权限: chmod +x ./release.sh
74 | # 2. 执行: ./release.sh
75 | # Last Update Time: 2018-01-20 13:17:06
76 | # Author: houbb
77 |
78 |
79 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyleC2C.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordFormat;
6 | import com.github.houbb.sensitive.word.collection.Char2CharMap;
7 |
8 | /**
9 | * 忽略英文的各种格式
10 | * @author binbin.hou
11 | * @since 0.0.6
12 | */
13 | @ThreadSafe
14 | public class WordFormatIgnoreEnglishStyleC2C implements IWordFormat {
15 |
16 | private static final IWordFormat INSTANCE = new WordFormatIgnoreEnglishStyleC2C();
17 |
18 | public static IWordFormat getInstance() {
19 | return INSTANCE;
20 | }
21 |
22 | /**
23 | * 英文字母1
24 | * @since 0.0.4
25 | */
26 | private static final String LETTERS_ONE =
27 | "ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ" +
28 | "ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ" +
29 | "⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵";
30 |
31 | /**
32 | * 英文字母2
33 | * @since 0.0.4
34 | */
35 | private static final String LETTERS_TWO =
36 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
37 | "abcdefghijklmnopqrstuvwxyz" +
38 | "abcdefghijklmnopqrstuvwxyz";
39 |
40 |
41 | /**
42 | * 字母映射表
43 | */
44 | private static final Char2CharMap LETTER_MAP = new Char2CharMap(LETTERS_ONE.length());
45 |
46 | static {
47 | final int size = LETTERS_ONE.length();
48 | for(int i = 0; i < size; i++) {
49 | LETTER_MAP.put(LETTERS_ONE.charAt(i), LETTERS_TWO.charAt(i));
50 | }
51 | }
52 |
53 | /**
54 | * 映射后的 char
55 | * @param c 待转换的 char
56 | * @return 转换结果
57 | * @since 0.29.x
58 | */
59 | private char getMappingChar(final char c) {
60 | char mc = LETTER_MAP.get(c);
61 | return mc == 0 ? c : mc;
62 | }
63 |
64 | @Override
65 | public char format(char original, IWordContext context) {
66 | return getMappingChar(original);
67 | }
68 |
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyle.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordFormat;
6 |
7 | import java.util.HashMap;
8 | import java.util.Map;
9 |
10 | /**
11 | * 忽略英文的各种格式
12 | * @author binbin.hou
13 | * @since 0.0.6
14 | */
15 | @Deprecated
16 | @ThreadSafe
17 | public class WordFormatIgnoreEnglishStyle implements IWordFormat {
18 |
19 | private static final IWordFormat INSTANCE = new WordFormatIgnoreEnglishStyle();
20 |
21 | public static IWordFormat getInstance() {
22 | return INSTANCE;
23 | }
24 |
25 | /**
26 | * 英文字母1
27 | * @since 0.0.4
28 | */
29 | private static final String LETTERS_ONE =
30 | "ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ" +
31 | "ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ" +
32 | "⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵";
33 |
34 | /**
35 | * 英文字母2
36 | * @since 0.0.4
37 | */
38 | private static final String LETTERS_TWO =
39 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
40 | "abcdefghijklmnopqrstuvwxyz" +
41 | "abcdefghijklmnopqrstuvwxyz";
42 |
43 |
44 | /**
45 | * 字母映射表
46 | */
47 | private static final Map LETTER_MAP = new HashMap<>(LETTERS_ONE.length());
48 |
49 | static {
50 | final int size = LETTERS_ONE.length();
51 | for(int i = 0; i < size; i++) {
52 | LETTER_MAP.put(LETTERS_ONE.charAt(i), LETTERS_TWO.charAt(i));
53 | }
54 | }
55 |
56 | /**
57 | * 映射后的 char
58 | * @param c 待转换的 char
59 | * @return 转换结果
60 | * @since 0.29.x
61 | */
62 | private char getMappingChar(final char c) {
63 | Character mapChar = LETTER_MAP.get(c);
64 | return mapChar == null ? c : mapChar;
65 | }
66 |
67 |
68 | @Override
69 | public char format(char original, IWordContext context) {
70 | return getMappingChar(original);
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format;
2 |
3 | import com.github.houbb.heaven.util.util.ArrayUtil;
4 | import com.github.houbb.heaven.util.util.CollectionUtil;
5 | import com.github.houbb.sensitive.word.api.IWordFormat;
6 |
7 | import java.util.ArrayList;
8 | import java.util.List;
9 |
10 | /**
11 | * 格式化工具类
12 | * @author binbin.hou
13 | * @since 0.3.5
14 | */
15 | public final class WordFormats {
16 |
17 | private WordFormats(){}
18 |
19 | /**
20 | * 链式
21 | * @param charFormats 列表
22 | * @return 结果
23 | */
24 | public static IWordFormat chains(final IWordFormat... charFormats) {
25 | if(ArrayUtil.isEmpty(charFormats)) {
26 | return none();
27 | }
28 |
29 | List wordFormats = new ArrayList<>(charFormats.length);
30 | return array(wordFormats);
31 | }
32 |
33 | /**
34 | * 链式
35 | * @param charFormats 列表
36 | * @return 结果
37 | */
38 | public static IWordFormat chains(final List charFormats) {
39 | if(CollectionUtil.isEmpty(charFormats)) {
40 | return none();
41 | }
42 |
43 | return array(charFormats);
44 | }
45 |
46 | public static IWordFormat none() {
47 | return WordFormatNone.getInstance();
48 | }
49 | public static IWordFormat ignoreCase() {
50 | return WordFormatIgnoreCase.getInstance();
51 | }
52 |
53 | public static IWordFormat ignoreEnglishStyle() {
54 | return WordFormatIgnoreEnglishStyleC2C.getInstance();
55 | }
56 |
57 | public static IWordFormat ignoreChineseStyle() {
58 | return WordFormatIgnoreChineseStyle.getInstance();
59 | }
60 |
61 | public static IWordFormat ignoreNumStyle() {
62 | return WordFormatIgnoreNumStyleC2C.getInstance();
63 | }
64 |
65 | public static IWordFormat ignoreWidth() {
66 | return WordFormatIgnoreWidth.getInstance();
67 | }
68 |
69 | public static IWordFormat array(final List wordFormats) {
70 | return new WordFormatArray(wordFormats);
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckEmail.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.heaven.util.lang.CharUtil;
5 | import com.github.houbb.heaven.util.util.regex.RegexUtil;
6 | import com.github.houbb.sensitive.word.api.IWordCheck;
7 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
8 | import com.github.houbb.sensitive.word.constant.WordConst;
9 | import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
10 |
11 | /**
12 | * email 正则表达式检测实现。
13 | *
14 | * TODO: 这里暂时不实现邮箱后缀的实现。
15 | *
16 | * (1)命中结果应该有标记,属于哪一个验证模式命中
17 | * (2)后期优化方案可以是:
18 | * 如果数字后面紧跟的是邮箱后缀命中,则直接连接起来 num+email-suffix;
19 | * (3)邮箱后缀的去重
20 | * 邮箱后缀可以只处理为和 Num 构建,如果没有直接丢弃的模式。
21 | *
22 | * 也可以严格的保留下来。
23 | * @author binbin.hou
24 | * @since 0.0.9
25 | */
26 | @ThreadSafe
27 | public class WordCheckEmail extends AbstractConditionWordCheck {
28 |
29 | /**
30 | * @since 0.3.0
31 | */
32 | private static final IWordCheck INSTANCE = new WordCheckEmail();
33 |
34 | public static IWordCheck getInstance() {
35 | return INSTANCE;
36 | }
37 |
38 | @Override
39 | protected Class extends IWordCheck> getSensitiveCheckClass() {
40 | return WordCheckEmail.class;
41 | }
42 |
43 | @Override
44 | protected String getType() {
45 | return WordTypeEnum.EMAIL.getCode();
46 | }
47 |
48 | @Override
49 | protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
50 | return CharUtil.isEmilChar(mappingChar);
51 | }
52 |
53 | @Override
54 | protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveWordContext checkContext) {
55 | int bufferLen = stringBuilder.length();
56 |
57 | //x@a.cn
58 | if(bufferLen < 6) {
59 | return false;
60 | }
61 | if(bufferLen > WordConst.MAX_EMAIL_LEN) {
62 | return false;
63 | }
64 |
65 | String string = stringBuilder.toString();
66 | return RegexUtil.isEmail(string);
67 | }
68 |
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.heaven.util.lang.StringUtil;
5 | import com.github.houbb.sensitive.word.api.IWordCheck;
6 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
7 | import com.github.houbb.sensitive.word.support.result.WordLengthResult;
8 |
9 | /**
10 | * 抽象实现策略
11 | *
12 | * @author binbin.hou
13 | * @since 0.4.0
14 | */
15 | @ThreadSafe
16 | public abstract class AbstractWordCheck implements IWordCheck {
17 |
18 | /**
19 | * 获取校验类
20 | * @return 类
21 | * @since 0.3.2
22 | */
23 | protected abstract Class extends IWordCheck> getSensitiveCheckClass();
24 |
25 | /**
26 | * 获取确切的长度
27 | * @param beginIndex 开始
28 | * @param checkContext 上下文
29 | * @return 长度
30 | * @since 0.4.0
31 | */
32 | protected abstract WordLengthResult getActualLength(int beginIndex, final InnerSensitiveWordContext checkContext);
33 |
34 | /**
35 | * 获取类别
36 | * @return 类别
37 | * @since 0.14.0
38 | */
39 | protected abstract String getType();
40 |
41 | @Override
42 | public WordCheckResult sensitiveCheck(int beginIndex,
43 | final InnerSensitiveWordContext checkContext) {
44 | Class extends IWordCheck> clazz = getSensitiveCheckClass();
45 | final String txt = checkContext.originalText();
46 | WordLengthResult wordLengthResult = WordLengthResult.newInstance()
47 | .wordAllowLen(0)
48 | .wordDenyLen(0);
49 |
50 | if(StringUtil.isEmpty(txt)) {
51 | return WordCheckResult.newInstance()
52 | .wordLengthResult(wordLengthResult)
53 | .type(getType())
54 | .checkClass(clazz);
55 | }
56 |
57 | wordLengthResult = getActualLength(beginIndex, checkContext);
58 |
59 | return WordCheckResult.newInstance()
60 | .wordLengthResult(wordLengthResult)
61 | .type(getType())
62 | .checkClass(clazz)
63 | ;
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.heaven.util.util.ArrayUtil;
4 | import com.github.houbb.heaven.util.util.CollectionUtil;
5 | import com.github.houbb.sensitive.word.api.IWordCheck;
6 |
7 | import java.util.ArrayList;
8 | import java.util.List;
9 |
10 | /**
11 | * 敏感词检测工具
12 | * @since 0.3.0
13 | */
14 | public final class WordChecks {
15 |
16 | private WordChecks(){}
17 |
18 | public static IWordCheck chains(final IWordCheck... sensitiveChecks) {
19 | if (ArrayUtil.isEmpty(sensitiveChecks)){
20 | return none();
21 | }
22 |
23 | List wordChecks = new ArrayList<>(sensitiveChecks.length);
24 | return array(wordChecks);
25 | }
26 |
27 | public static IWordCheck chains(final List sensitiveChecks) {
28 | if (CollectionUtil.isEmpty(sensitiveChecks)){
29 | return none();
30 | }
31 |
32 | return array(sensitiveChecks);
33 | }
34 |
35 | public static IWordCheck email() {
36 | return WordCheckEmail.getInstance();
37 | }
38 |
39 | public static IWordCheck num() {
40 | return WordCheckNum.getInstance();
41 | }
42 |
43 | public static IWordCheck url() {
44 | return WordCheckUrl.getInstance();
45 | }
46 |
47 | public static IWordCheck word() {
48 | return WordCheckWord.getInstance();
49 | }
50 |
51 | public static IWordCheck none() {
52 | return WordCheckNone.getInstance();
53 | }
54 |
55 | /**
56 | * ipv4 校验
57 | * @since 0.17.0
58 | * @return 实现
59 | */
60 | public static IWordCheck ipv4() {
61 | return WordCheckIPV4.getInstance();
62 | }
63 |
64 | /**
65 | * 不需要前缀的 urlPrefix
66 | * 注意:这种检测方法可能会和代码中的包名称冲突
67 | *
68 | * @return 实现
69 | * @since 0.25.0
70 | */
71 | public static IWordCheck urlNoPrefix() {
72 | return WordCheckUrlNoPrefix.getInstance();
73 | }
74 |
75 | /**
76 | * 集合
77 | *
78 | * @return 实现
79 | * @since 0.30.0
80 | */
81 | public static IWordCheck array(final List wordChecks) {
82 | return new WordCheckArray(wordChecks);
83 | }
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.data;
2 |
3 | import com.github.houbb.heaven.util.util.CollectionUtil;
4 | import com.github.houbb.sensitive.word.api.IWordData;
5 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
6 | import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
7 |
8 | import java.util.Collection;
9 |
10 | /**
11 | * 抽象数据
12 | *
13 | * @since 0.7.0
14 | */
15 | public abstract class AbstractWordData implements IWordData {
16 |
17 | /**
18 | * 是否包含
19 | * @param stringBuilder 字符
20 | * @param innerContext 上下文
21 | * @return 结果
22 | */
23 | protected abstract WordContainsTypeEnum doContains(StringBuilder stringBuilder, InnerSensitiveWordContext innerContext);
24 |
25 | /**
26 | * 初始化
27 | * @param collection 数据
28 | */
29 | protected abstract void doInitWordData(Collection collection);
30 |
31 | /**
32 | * 删除敏感词
33 | * @param collection 集合
34 | */
35 | protected abstract void doRemoveWord(Collection collection);
36 |
37 | /**
38 | * 新增敏感词
39 | * @param collection 敏感词
40 | */
41 | protected abstract void doAddWord(Collection collection);
42 |
43 | @Override
44 | public void initWordData(Collection collection) {
45 | //1. 预留
46 |
47 | this.doInitWordData(collection);
48 | }
49 |
50 | @Override
51 | public void removeWord(Collection collection) {
52 | if(CollectionUtil.isEmpty(collection)) {
53 | return;
54 | }
55 |
56 | doRemoveWord(collection);
57 | }
58 |
59 | @Override
60 | public void addWord(Collection collection) {
61 | if(CollectionUtil.isEmpty(collection)) {
62 | return;
63 | }
64 |
65 | doAddWord(collection);
66 | }
67 |
68 | @Override
69 | public WordContainsTypeEnum contains(StringBuilder stringBuilder, InnerSensitiveWordContext innerContext) {
70 | if(stringBuilder == null
71 | || stringBuilder.length() <= 0) {
72 | return WordContainsTypeEnum.NOT_FOUND;
73 | }
74 |
75 | return doContains(stringBuilder, innerContext);
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditions.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.resultcondition;
2 |
3 | import com.github.houbb.heaven.support.pipeline.Pipeline;
4 | import com.github.houbb.heaven.util.common.ArgUtil;
5 | import com.github.houbb.heaven.util.util.ArrayUtil;
6 | import com.github.houbb.sensitive.word.api.IWordResultCondition;
7 |
8 | import java.util.List;
9 |
10 | /**
11 | * 匹配结果工具类
12 | *
13 | * @since 0.13.0
14 | */
15 | public final class WordResultConditions {
16 |
17 | /**
18 | * 恒为真
19 | * @return 结果
20 | */
21 | public static IWordResultCondition alwaysTrue() {
22 | return new WordResultConditionAlwaysTrue();
23 | }
24 |
25 | /**
26 | * 如果是英文,则必须全词匹匹配
27 | * @return 结果
28 | * @since 0.13.0
29 | */
30 | public static IWordResultCondition englishWordMatch() {
31 | return new WordResultConditionEnglishWordMatch();
32 | }
33 |
34 | /**
35 | * 如果是英文或者数字,则必须全词匹匹配
36 | * @return 结果
37 | * @since 0.20.0
38 | */
39 | public static IWordResultCondition englishWordNumMatch() {
40 | return new WordResultConditionEnglishWordNumMatch();
41 | }
42 |
43 | /**
44 | * 单词标签
45 | * @param tags 标签列表
46 | * @return 结果
47 | * @since 0.23.0
48 | */
49 | public static IWordResultCondition wordTags(List tags) {
50 | ArgUtil.notEmpty(tags, "tags");
51 |
52 | return new WordResultConditionWordTagsMatch(tags);
53 | }
54 |
55 | /**
56 | * 链式调用,支持同时满足多个条件
57 | *
58 | * @since 0.23.0
59 | * @param condition 条件
60 | * @param others 其他条件
61 | * @return 结果
62 | */
63 | public static IWordResultCondition chains(final IWordResultCondition condition, final IWordResultCondition ... others) {
64 | return new WordResultConditionInit() {
65 | @Override
66 | protected void init(Pipeline pipeline) {
67 | pipeline.addLast(condition);
68 | if(ArrayUtil.isNotEmpty(others)) {
69 | for(IWordResultCondition other : others) {
70 | pipeline.addLast(other);
71 | }
72 | }
73 | }
74 | };
75 | }
76 |
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.heaven.util.lang.CharUtil;
5 | import com.github.houbb.heaven.util.util.regex.RegexUtil;
6 | import com.github.houbb.sensitive.word.api.IWordCheck;
7 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
8 | import com.github.houbb.sensitive.word.constant.WordConst;
9 | import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
10 |
11 | /**
12 | * URL 正则表达式检测实现。
13 | *
14 | * 也可以严格的保留下来。
15 | *
16 | * (1)暂时先粗略的处理 web-site
17 | * (2)如果网址的最后为图片类型,则跳过。
18 | * (3)长度超过 70,直接结束。
19 | *
20 | * @author binbin.hou
21 | * @since 0.0.9
22 | */
23 | @ThreadSafe
24 | public class WordCheckUrl extends AbstractConditionWordCheck {
25 |
26 | /**
27 | * @since 0.3.0
28 | */
29 | private static final IWordCheck INSTANCE = new WordCheckUrl();
30 |
31 | public static IWordCheck getInstance() {
32 | return INSTANCE;
33 | }
34 |
35 | @Override
36 | protected Class extends IWordCheck> getSensitiveCheckClass() {
37 | return WordCheckUrl.class;
38 | }
39 |
40 | @Override
41 | protected String getType() {
42 | return WordTypeEnum.URL.getCode();
43 | }
44 |
45 | @Override
46 | protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
47 | return CharUtil.isWebSiteChar(mappingChar) || mappingChar == ':' || mappingChar == '/';
48 | }
49 |
50 | @Override
51 | protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveWordContext checkContext) {
52 | int bufferLen = stringBuilder.length();
53 | //a.cn
54 | if(bufferLen < 4) {
55 | return false;
56 | }
57 | if(bufferLen > WordConst.MAX_WEB_SITE_LEN) {
58 | return false;
59 | }
60 |
61 | // 改为 http:// 或者 https:// 开头
62 | String string = stringBuilder.toString();
63 | return isUrl(string);
64 | }
65 |
66 | /**
67 | * 是否为 URL
68 | * @param text 原始文本
69 | * @return 结果
70 | * @since 0.25.0
71 | */
72 | protected boolean isUrl(final String text) {
73 | return RegexUtil.isUrl(text);
74 | }
75 |
76 | }
77 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckIPV4.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.check;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.heaven.util.lang.CharUtil;
5 | import com.github.houbb.heaven.util.lang.StringUtil;
6 | import com.github.houbb.heaven.util.util.regex.RegexUtil;
7 | import com.github.houbb.sensitive.word.api.IWordCheck;
8 | import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
9 | import com.github.houbb.sensitive.word.constant.WordConst;
10 | import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
11 | import com.github.houbb.sensitive.word.utils.InnerCharUtils;
12 |
13 | import java.util.List;
14 |
15 | /**
16 | * IPV4 检测
17 | *
18 | * @author binbin.hou
19 | * @since 0.17.0
20 | */
21 | @ThreadSafe
22 | public class WordCheckIPV4 extends AbstractConditionWordCheck {
23 |
24 | private static final IWordCheck INSTANCE = new WordCheckIPV4();
25 |
26 | public static IWordCheck getInstance() {
27 | return INSTANCE;
28 | }
29 |
30 | @Override
31 | protected Class extends IWordCheck> getSensitiveCheckClass() {
32 | return WordCheckIPV4.class;
33 | }
34 |
35 | @Override
36 | protected String getType() {
37 | return WordTypeEnum.IPV4.getCode();
38 | }
39 |
40 | @Override
41 | protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
42 | return CharUtil.isNumber(mappingChar) || '.' == mappingChar;
43 | }
44 |
45 | @Override
46 | protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveWordContext checkContext) {
47 | int bufferLen = stringBuilder.length();
48 | //0.0.0.0
49 | //255.255.255.255
50 | if(bufferLen < 7
51 | || bufferLen > 15) {
52 | return false;
53 | }
54 |
55 | // 尽可能减少对象的创建
56 | String string = stringBuilder.toString();
57 | List stringList = StringUtil.splitToList(string, '.');
58 | if(stringList.size() != 4) {
59 | return false;
60 | }
61 |
62 | for(String numStr : stringList) {
63 | int integer = InnerCharUtils.parseInt(numStr);
64 |
65 | if(integer < 0 || integer > 256) {
66 | return false;
67 | }
68 | }
69 |
70 | // 额外处理
71 | return true;
72 | }
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsAllowTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordAllow;
4 | import com.github.houbb.sensitive.word.api.IWordDeny;
5 | import com.github.houbb.sensitive.word.support.allow.WordAllows;
6 | import com.github.houbb.sensitive.word.support.deny.WordDenys;
7 | import com.github.houbb.sensitive.word.support.replace.WordReplaces;
8 | import org.junit.Assert;
9 | import org.junit.Test;
10 |
11 | import java.util.Arrays;
12 | import java.util.List;
13 |
14 | /**
15 | * project: sensitive-word-SensitiveWordBsTest
16 | * create on 2020/1/7 23:43
17 | *
18 | * @author Administrator
19 | * @since 0.21.0
20 | */
21 | public class SensitiveWordBsAllowTest {
22 |
23 | /**
24 | * 是否包含
25 | *
26 | * https://github.com/houbb/sensitive-word/issues/76
27 | *
28 | * @since 0.0.1
29 | */
30 | @Test
31 | public void findAllowTest() {
32 | final String text = "三黄片黄片";
33 |
34 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
35 | .wordAllow(new IWordAllow() {
36 | @Override
37 | public List allow() {
38 | return Arrays.asList("三黄片");
39 | }
40 | })
41 | .init();
42 |
43 | Assert.assertEquals("[黄片]", sensitiveWordBs.findAll(text).toString());
44 | }
45 |
46 | /**
47 | * https://github.com/houbb/sensitive-word/issues/19
48 | *
49 | * @since 0.21.0
50 | */
51 | @Test
52 | public void bug19FixTest() {
53 | final String text = "共产党是白名单不会被检测";
54 | final String text2 = "共产党是白名单不会被检测,但是共产是黑名单";
55 |
56 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
57 | .wordAllow(new IWordAllow() {
58 | @Override
59 | public List allow() {
60 | return Arrays.asList("共产党");
61 | }
62 | })
63 | .wordDeny(new IWordDeny() {
64 | @Override
65 | public List deny() {
66 | return Arrays.asList("政府", "国家", "共产");
67 | }
68 | })
69 | .init();
70 |
71 | Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
72 | Assert.assertEquals("[共产]", sensitiveWordBs.findAll(text2).toString());
73 | }
74 |
75 | }
76 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyle.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordFormat;
6 |
7 | import java.util.HashMap;
8 | import java.util.Map;
9 |
10 | /**
11 | * 忽略数字的样式
12 | * @author binbin.hou
13 | * @since 0.0.5
14 | */
15 | @Deprecated
16 | @ThreadSafe
17 | public class WordFormatIgnoreNumStyle implements IWordFormat {
18 |
19 | private static final IWordFormat INSTANCE = new WordFormatIgnoreNumStyle();
20 |
21 | public static IWordFormat getInstance() {
22 | return INSTANCE;
23 | }
24 |
25 | private static final String NUM_ONE = "⓪0零º₀⓿○" +
26 | "123456789" +
27 | "一二三四五六七八九" +
28 | "壹贰叁肆伍陆柒捌玖" +
29 | "¹²³⁴⁵⁶⁷⁸⁹" +
30 | "₁₂₃₄₅₆₇₈₉" +
31 | "①②③④⑤⑥⑦⑧⑨" +
32 | "⑴⑵⑶⑷⑸⑹⑺⑻⑼" +
33 | "⒈⒉⒊⒋⒌⒍⒎⒏⒐" +
34 | "❶❷❸❹❺❻❼❽❾" +
35 | "➀➁➂➃➄➅➆➇➈" +
36 | "➊➋➌➍➎➏➐➑➒" +
37 | "㈠㈡㈢㈣㈤㈥㈦㈧㈨" +
38 | "⓵⓶⓷⓸⓹⓺⓻⓼⓽" +
39 | "㊀㊁㊂㊃㊄㊅㊆㊇㊈" +
40 | "ⅰⅱⅲⅳⅴⅵⅶⅷⅸ" +
41 | "ⅠⅡⅢⅣⅤⅥⅦⅧⅨ";
42 |
43 | private static final String NUM_TWO = "0000000"+
44 | "123456789" +
45 | "123456789" +
46 | "123456789" +
47 | "123456789" +
48 | "123456789" +
49 | "123456789" +
50 | "123456789" +
51 | "123456789" +
52 | "123456789" +
53 | "123456789" +
54 | "123456789" +
55 | "123456789" +
56 | "123456789" +
57 | "123456789" +
58 | "123456789" +
59 | "123456789";
60 |
61 | private static final Map NUMBER_MAP = new HashMap<>(NUM_ONE.length());
62 |
63 | static {
64 | final int size = NUM_ONE.length();
65 | for(int i = 0; i < size; i++) {
66 | NUMBER_MAP.put(NUM_ONE.charAt(i), NUM_TWO.charAt(i));
67 | }
68 | }
69 |
70 | /**
71 | * 映射后的 char
72 | * @param c 待转换的 char
73 | * @return 结果
74 | * @since 0.0.4
75 | */
76 | private char getMappingChar(final char c) {
77 | Character mapChar = NUMBER_MAP.get(c);
78 | return mapChar == null ? c : mapChar;
79 | }
80 |
81 | @Override
82 | public char format(char original, IWordContext context) {
83 | return getMappingChar(original);
84 | }
85 |
86 | }
87 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyleC2C.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.format;
2 |
3 | import com.github.houbb.heaven.annotation.ThreadSafe;
4 | import com.github.houbb.sensitive.word.api.IWordContext;
5 | import com.github.houbb.sensitive.word.api.IWordFormat;
6 | import com.github.houbb.sensitive.word.collection.Char2CharMap;
7 |
8 | import java.util.HashMap;
9 | import java.util.Map;
10 |
11 | /**
12 | * 忽略数字的样式
13 | * @author binbin.hou
14 | * @since 0.0.5
15 | */
16 | @ThreadSafe
17 | public class WordFormatIgnoreNumStyleC2C implements IWordFormat {
18 |
19 | private static final IWordFormat INSTANCE = new WordFormatIgnoreNumStyleC2C();
20 |
21 | public static IWordFormat getInstance() {
22 | return INSTANCE;
23 | }
24 |
25 | private static final String NUM_ONE = "⓪0零º₀⓿○" +
26 | "123456789" +
27 | "一二三四五六七八九" +
28 | "壹贰叁肆伍陆柒捌玖" +
29 | "¹²³⁴⁵⁶⁷⁸⁹" +
30 | "₁₂₃₄₅₆₇₈₉" +
31 | "①②③④⑤⑥⑦⑧⑨" +
32 | "⑴⑵⑶⑷⑸⑹⑺⑻⑼" +
33 | "⒈⒉⒊⒋⒌⒍⒎⒏⒐" +
34 | "❶❷❸❹❺❻❼❽❾" +
35 | "➀➁➂➃➄➅➆➇➈" +
36 | "➊➋➌➍➎➏➐➑➒" +
37 | "㈠㈡㈢㈣㈤㈥㈦㈧㈨" +
38 | "⓵⓶⓷⓸⓹⓺⓻⓼⓽" +
39 | "㊀㊁㊂㊃㊄㊅㊆㊇㊈" +
40 | "ⅰⅱⅲⅳⅴⅵⅶⅷⅸ" +
41 | "ⅠⅡⅢⅣⅤⅥⅦⅧⅨ";
42 |
43 | private static final String NUM_TWO = "0000000"+
44 | "123456789" +
45 | "123456789" +
46 | "123456789" +
47 | "123456789" +
48 | "123456789" +
49 | "123456789" +
50 | "123456789" +
51 | "123456789" +
52 | "123456789" +
53 | "123456789" +
54 | "123456789" +
55 | "123456789" +
56 | "123456789" +
57 | "123456789" +
58 | "123456789" +
59 | "123456789";
60 |
61 | private static final Char2CharMap NUMBER_MAP = new Char2CharMap(NUM_ONE.length());
62 |
63 | static {
64 | final int size = NUM_ONE.length();
65 | for(int i = 0; i < size; i++) {
66 | NUMBER_MAP.put(NUM_ONE.charAt(i), NUM_TWO.charAt(i));
67 | }
68 | }
69 |
70 | /**
71 | * 映射后的 char
72 | * @param c 待转换的 char
73 | * @return 结果
74 | * @since 0.0.4
75 | */
76 | private char getMappingChar(final char c) {
77 | char mc = NUMBER_MAP.get(c);
78 | return mc == 0 ? c : mc;
79 | }
80 |
81 | @Override
82 | public char format(char original, IWordContext context) {
83 | return getMappingChar(original);
84 | }
85 |
86 | }
87 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.utils;
2 |
3 | import com.github.houbb.heaven.util.lang.StringUtil;
4 | import com.github.houbb.heaven.util.util.CollectionUtil;
5 | import com.github.houbb.sensitive.word.api.IWordFormat;
6 | import com.github.houbb.sensitive.word.api.IWordContext;
7 |
8 | import java.util.*;
9 |
10 | /**
11 | * 内部格式化工具类
12 | * @since 0.1.1
13 | */
14 | public final class InnerWordFormatUtils {
15 |
16 | private InnerWordFormatUtils(){}
17 |
18 | /**
19 | * 空字符数组
20 | * @since 0.6.0
21 | */
22 | private static final char[] EMPTY_CHARS = new char[0];
23 |
24 | /**
25 | * 格式化
26 | * @param original 原始
27 | * @param context 上下文
28 | * @return 结果
29 | * @since 0.1.1
30 | */
31 | public static String format(final String original, final IWordContext context) {
32 | if(StringUtil.isEmpty(original)) {
33 | return original;
34 | }
35 |
36 | StringBuilder stringBuilder = new StringBuilder();
37 | IWordFormat charFormat = context.wordFormat();
38 | int len = original.length();
39 | for(int i = 0; i < len; i++) {
40 | char c = original.charAt(i);
41 | char cf = charFormat.format(c, context);
42 | stringBuilder.append(cf);
43 | }
44 |
45 | return stringBuilder.toString();
46 | }
47 |
48 | /**
49 | * 字符串统一的格式化处理
50 | *
51 | * 注意:这个需要 map 的实现是 {@link it.unimi.dsi.fastutil.chars.Char2CharOpenHashMap}
52 | * @param map 映射集合
53 | * @param c 原始
54 | * @return 结果
55 | * @since 0.28.0
56 | */
57 | public static char getMappingChar(final Map map, char c) {
58 | //Char2CharOpenHashMap 不存在映射也是返回 null
59 | Object mc = map.get(c);
60 | if(mc == null) {
61 | return c;
62 | }
63 | return (char) mc;
64 | }
65 |
66 | /**
67 | * 格式化列表
68 | * @param list 列表
69 | * @param context 上下文
70 | * @return 结果
71 | * @since 0。3.0
72 | */
73 | public static List formatWordList(Collection list,
74 | final IWordContext context) {
75 | if(CollectionUtil.isEmpty(list)) {
76 | return new ArrayList<>();
77 | }
78 |
79 | List resultList = new ArrayList<>(list.size());
80 | for(String word : list) {
81 | String formatWord = InnerWordFormatUtils.format(word, context);
82 | resultList.add(formatWord);
83 | }
84 |
85 | return resultList;
86 | }
87 |
88 | }
89 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.benchmark;
2 |
3 | import com.github.houbb.heaven.util.util.RandomUtil;
4 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
5 | import com.github.houbb.sensitive.word.core.SensitiveWordHelper;
6 | import org.junit.Ignore;
7 | import org.junit.Test;
8 |
9 | @Ignore
10 | public class BenchmarkTimesTest {
11 |
12 | /**
13 | * 测试基准:100+字符串 * 10W次
14 | *
15 | * V0.6.0: 1470ms,接近 7.2W QPS
16 | * V0.7.0: 1380ms
17 | * v0.29.2: 781ms,接近 14W QPS
18 | */
19 | @Test
20 | public void onlyWordAndNoReplaceTest() {
21 | // 1W 次
22 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
23 | .enableWordCheck(true)
24 | .enableNumCheck(false)
25 | .enableUrlCheck(false)
26 | .enableEmailCheck(false)
27 | .ignoreRepeat(false)
28 | .ignoreCase(false)
29 | .ignoreNumStyle(false)
30 | .ignoreChineseStyle(false)
31 | .ignoreEnglishStyle(false)
32 | .ignoreWidth(false)
33 | .init();
34 |
35 | String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
36 | + "我们他妈的从来不说脏说";
37 |
38 | long start = System.currentTimeMillis();
39 | for(int i = 0; i < 100_000; i++) {
40 | sensitiveWordBs.findAll(randomText);
41 | }
42 | long end = System.currentTimeMillis();
43 | System.out.println("------------------ COST: " + (end-start));
44 | }
45 |
46 | /**
47 | * 测试基准:100+字符串 * 10W次
48 | *
49 | * V0.6.0: 2744ms, 约 3.7W QPS
50 | * V0.7.0: 2723ms
51 | * V0.29.2: 1588ms,约 6.29W QPS
52 | */
53 | @Test
54 | public void onlyWordAndWithReplaceTest() {
55 | // 1W 次
56 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
57 | .enableWordCheck(true)
58 | .enableNumCheck(false)
59 | .enableUrlCheck(false)
60 | .enableEmailCheck(false)
61 | .ignoreRepeat(true)
62 | .ignoreCase(true)
63 | .ignoreNumStyle(true)
64 | .ignoreChineseStyle(true)
65 | .ignoreEnglishStyle(true)
66 | .ignoreWidth(true)
67 | .init();
68 |
69 | String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
70 | + "我们他妈的从来不说脏说";
71 |
72 | long start = System.currentTimeMillis();
73 | for(int i = 0; i < 100_000; i++) {
74 | sensitiveWordBs.findAll(randomText);
75 | }
76 | long end = System.currentTimeMillis();
77 | System.out.println("------------------ COST: " + (end-start));
78 | }
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTagTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.bs;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordDeny;
4 | import com.github.houbb.sensitive.word.api.IWordTag;
5 | import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
6 | import com.github.houbb.sensitive.word.support.result.WordTagsDto;
7 | import com.github.houbb.sensitive.word.support.tag.WordTags;
8 | import org.junit.Assert;
9 | import org.junit.Test;
10 |
11 | import java.util.Arrays;
12 | import java.util.List;
13 |
14 | /**
15 | * project: sensitive-word-SensitiveWordBsTest
16 | * create on 2020/1/7 23:43
17 | *
18 | * @author Administrator
19 | * @since 0.10.0
20 | */
21 | public class SensitiveWordBsTagTest {
22 |
23 | @Test
24 | public void wordResultHandlerWordTagsTest() {
25 | // 自定义测试标签类
26 | IWordTag wordTag = WordTags.lines(Arrays.asList("0售 广告"));
27 |
28 | // 指定初始化
29 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
30 | .wordDeny(new IWordDeny() {
31 | @Override
32 | public List deny() {
33 | return Arrays.asList("0售");
34 | }
35 | })
36 | .wordTag(wordTag)
37 | .init()
38 | ;
39 | List wordTagsDtoList1 = sensitiveWordBs.findAll("零售", WordResultHandlers.wordTags());
40 | Assert.assertEquals("[WordTagsDto{word='零售', tags=[广告]}]", wordTagsDtoList1.toString());
41 |
42 | List wordTagsDtoList2 = sensitiveWordBs.findAll("0售", WordResultHandlers.wordTags());
43 | Assert.assertEquals("[WordTagsDto{word='0售', tags=[广告]}]", wordTagsDtoList2.toString());
44 | }
45 |
46 | @Test
47 | public void wordResultHandlerWordTags2Test() {
48 | // 自定义测试标签类
49 | IWordTag wordTag = WordTags.lines(Arrays.asList("天安门 政治,国家,地址"));
50 |
51 | // 指定初始化
52 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
53 | .wordTag(wordTag)
54 | .init()
55 | ;
56 | List wordTagsDtoList1 = sensitiveWordBs.findAll("天安门", WordResultHandlers.wordTags());
57 | Assert.assertEquals("[WordTagsDto{word='天安门', tags=[政治, 国家, 地址]}]", wordTagsDtoList1.toString());
58 | }
59 |
60 | @Test
61 | public void wordTagsTest() {
62 | // 自定义测试标签类
63 | IWordTag wordTag = WordTags.lines(Arrays.asList("0售 广告", "天安门 政治,国家,地址"));
64 | // 指定初始化
65 | SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
66 | .wordTag(wordTag)
67 | .init()
68 | ;
69 |
70 | Assert.assertEquals("[政治, 国家, 地址]", sensitiveWordBs.tags("天安门").toString());
71 | Assert.assertEquals("[广告]", sensitiveWordBs.tags("零售").toString());
72 | Assert.assertEquals("[广告]", sensitiveWordBs.tags("0售").toString());
73 | }
74 |
75 | }
76 |
--------------------------------------------------------------------------------
/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.support.handler;
2 |
3 | import com.github.houbb.sensitive.word.api.IWordResult;
4 | import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
5 | import com.github.houbb.sensitive.word.core.SensitiveWordHelper;
6 | import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
7 | import com.github.houbb.sensitive.word.support.result.WordTagsDto;
8 | import com.github.houbb.sensitive.word.support.tag.WordTags;
9 | import org.junit.Assert;
10 | import org.junit.Ignore;
11 | import org.junit.Test;
12 |
13 | import java.util.*;
14 |
15 | /**
16 | * @since 0.12.0
17 | */
18 | public class WordResultHandlerTest {
19 |
20 | @Test
21 | public void findAllWordTest() {
22 | final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
23 |
24 | List wordList = SensitiveWordHelper.findAll(text);
25 | Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
26 | List wordList2 = SensitiveWordHelper.findAll(text, WordResultHandlers.word());
27 | Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList2.toString());
28 |
29 | List wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
30 | Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}, WordResult{startIndex=9, endIndex=12, type='WORD', word='毛主席'}, WordResult{startIndex=18, endIndex=21, type='WORD', word='天安门'}]", wordList3.toString());
31 | }
32 |
33 | @Test
34 | public void findAllWordTest2() {
35 | final String text = "骂人:你他妈; 邮箱:123@qq.com; mobile: 13088889999; 网址:https://www.baidu.com";
36 | List wordList3 = SensitiveWordHelper
37 | .findAll(text, WordResultHandlers.raw());
38 | Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD', word='你他妈'}]", wordList3.toString());
39 | }
40 |
41 | @Test
42 | public void wordTagsTest() {
43 | final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
44 |
45 | // 默认敏感词标签为空
46 | List wordList1 = SensitiveWordHelper.findAll(text, WordResultHandlers.wordTags());
47 | Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=null}, WordTagsDto{word='毛主席', tags=[0]}, WordTagsDto{word='天安门', tags=null}]", wordList1.toString());
48 |
49 | Map> wordMap = new HashMap<>();
50 | wordMap.put("五星红旗", new HashSet<>(Arrays.asList("政治", "国家")));
51 | wordMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "伟人", "国家")));
52 | wordMap.put("天安门", new HashSet<>(Arrays.asList("政治", "国家", "地址")));
53 |
54 | List wordList2 = SensitiveWordBs.newInstance()
55 | .wordTag(WordTags.map(wordMap))
56 | .init()
57 | .findAll(text, WordResultHandlers.wordTags());
58 | Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=[政治, 国家]}, WordTagsDto{word='毛主席', tags=[政治, 伟人, 国家]}, WordTagsDto{word='天安门', tags=[政治, 国家, 地址]}]", wordList2.toString());
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/src/main/java/com/github/houbb/sensitive/word/collection/Char2CharMap.java:
--------------------------------------------------------------------------------
1 | package com.github.houbb.sensitive.word.collection;
2 |
3 | /**
4 | * 原生无装箱、拆箱的实现
5 | *
6 | * @since 0.29.2
7 | */
8 | public final class Char2CharMap {
9 |
10 | private static final char EMPTY_KEY = '\0'; // 特殊标记,表示空槽
11 | private static final float LOAD_FACTOR = 0.5f;
12 |
13 | private char[] keys;
14 | private char[] values;
15 | private int size;
16 | private int mask; // capacity-1,用于快速取模
17 | private int maxSize;
18 |
19 | public Char2CharMap(int expectedSize) {
20 | int capacity = tableSizeFor((int) (expectedSize / LOAD_FACTOR) + 1);
21 | this.keys = new char[capacity];
22 | this.values = new char[capacity];
23 | this.mask = capacity - 1;
24 | this.maxSize = (int) (capacity * LOAD_FACTOR);
25 | this.size = 0;
26 | }
27 |
28 | /** 2 的幂次方容量 */
29 | private static int tableSizeFor(int cap) {
30 | int n = cap - 1;
31 | n |= n >>> 1;
32 | n |= n >>> 2;
33 | n |= n >>> 4;
34 | n |= n >>> 8;
35 | n |= n >>> 16;
36 | return (n < 2) ? 2 : (n >= (1 << 30) ? (1 << 30) : n + 1);
37 | }
38 |
39 | private int hash(char k) {
40 | return (k * 0x9E3779B9) & mask; // 乘法哈希 + mask
41 | }
42 |
43 | /** 插入或覆盖 */
44 | public void put(char key, char value) {
45 | if (key == EMPTY_KEY) {
46 | throw new IllegalArgumentException("Key '\0' is reserved as EMPTY_KEY.");
47 | }
48 | int idx = hash(key);
49 | while (true) {
50 | if (keys[idx] == EMPTY_KEY) {
51 | keys[idx] = key;
52 | values[idx] = value;
53 | if (++size >= maxSize) {
54 | resize();
55 | }
56 | return;
57 | } else if (keys[idx] == key) {
58 | values[idx] = value;
59 | return;
60 | }
61 | idx = (idx + 1) & mask;
62 | }
63 | }
64 |
65 | /** 查询,不存在时返回 defaultValue */
66 | public char get(char key, char defaultValue) {
67 | if (key == EMPTY_KEY) return defaultValue;
68 | int idx = hash(key);
69 | while (true) {
70 | char k = keys[idx];
71 | if (k == EMPTY_KEY) return defaultValue;
72 | if (k == key) return values[idx];
73 | idx = (idx + 1) & mask;
74 | }
75 | }
76 |
77 | public char get(char key) {
78 | char defaultVal = 0;
79 | return get(key, defaultVal);
80 | }
81 |
82 | private void resize() {
83 | int newCap = keys.length << 1;
84 | char[] oldKeys = keys;
85 | char[] oldVals = values;
86 |
87 | keys = new char[newCap];
88 | values = new char[newCap];
89 | mask = newCap - 1;
90 | maxSize = (int) (newCap * LOAD_FACTOR);
91 | size = 0;
92 |
93 | for (int i = 0; i < oldKeys.length; i++) {
94 | char k = oldKeys[i];
95 | if (k != EMPTY_KEY) {
96 | put(k, oldVals[i]);
97 | }
98 | }
99 | }
100 |
101 | public int size() {
102 | return size;
103 | }
104 | }
105 |
106 |
107 |
--------------------------------------------------------------------------------