pull/59/MERGE
parent
e8da08a2ea
commit
0db6a80044
|
@ -91,8 +91,8 @@ public interface SensitiveWordService {
|
||||||
* 判断文本是否包含敏感词
|
* 判断文本是否包含敏感词
|
||||||
*
|
*
|
||||||
* @param text 文本
|
* @param text 文本
|
||||||
* @param tags 表述数组
|
* @param tags 标签数组
|
||||||
* @return 是否包含
|
* @return 是否包含敏感词
|
||||||
*/
|
*/
|
||||||
boolean isTextValid(String text, List<String> tags);
|
boolean isTextValid(String text, List<String> tags);
|
||||||
|
|
||||||
|
|
|
@ -258,6 +258,7 @@ public class SensitiveWordServiceImpl implements SensitiveWordService {
|
||||||
if (trie == null) {
|
if (trie == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
// 如果有一个标签不合法,则返回 false 不合法
|
||||||
if (!trie.isValid(text)) {
|
if (!trie.isValid(text)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,9 +30,10 @@ public class SimpleTrie {
|
||||||
* @param strs 字符串数组
|
* @param strs 字符串数组
|
||||||
*/
|
*/
|
||||||
public SimpleTrie(Collection<String> strs) {
|
public SimpleTrie(Collection<String> strs) {
|
||||||
children = new HashMap<>();
|
// 排序,优先使用较短的前缀
|
||||||
|
strs = CollUtil.sort(strs, String::compareTo);
|
||||||
// 构建树
|
// 构建树
|
||||||
CollUtil.sort(strs, String::compareTo); // 排序,优先使用较短的前缀
|
children = new HashMap<>();
|
||||||
for (String str : strs) {
|
for (String str : strs) {
|
||||||
Map<Character, Object> child = children;
|
Map<Character, Object> child = children;
|
||||||
// 遍历每个字符
|
// 遍历每个字符
|
||||||
|
@ -56,11 +57,11 @@ public class SimpleTrie {
|
||||||
* 验证文本是否合法,即不包含敏感词
|
* 验证文本是否合法,即不包含敏感词
|
||||||
*
|
*
|
||||||
* @param text 文本
|
* @param text 文本
|
||||||
* @return 是否 ok
|
* @return 是否 true-合法 false-不合法
|
||||||
*/
|
*/
|
||||||
public boolean isValid(String text) {
|
public boolean isValid(String text) {
|
||||||
// 遍历 text,使用每一个 [i, n) 段的字符串,使用 children 前缀树匹配,是否包含敏感词
|
// 遍历 text,使用每一个 [i, n) 段的字符串,使用 children 前缀树匹配,是否包含敏感词
|
||||||
for (int i = 0; i < text.length() - 1; i++) {
|
for (int i = 0; i < text.length(); i++) {
|
||||||
Map<Character, Object> child = (Map<Character, Object>) children.get(text.charAt(i));
|
Map<Character, Object> child = (Map<Character, Object>) children.get(text.charAt(i));
|
||||||
if (child == null) {
|
if (child == null) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -74,14 +75,17 @@ public class SimpleTrie {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 验证文本从指定位置开始,是否包含某个敏感词
|
* 验证文本从指定位置开始,是否不包含某个敏感词
|
||||||
*
|
*
|
||||||
* @param text 文本
|
* @param text 文本
|
||||||
* @param index 开始位置
|
* @param index 开始位置
|
||||||
* @param child 节点(当前遍历到的)
|
* @param child 节点(当前遍历到的)
|
||||||
* @return 是否包含
|
* @return 是否不包含 true-不包含 false-包含
|
||||||
*/
|
*/
|
||||||
private boolean recursion(String text, int index, Map<Character, Object> child) {
|
private boolean recursion(String text, int index, Map<Character, Object> child) {
|
||||||
|
if (child.containsKey(CHARACTER_END)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (index == text.length()) {
|
if (index == text.length()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -99,7 +103,7 @@ public class SimpleTrie {
|
||||||
*/
|
*/
|
||||||
public List<String> validate(String text) {
|
public List<String> validate(String text) {
|
||||||
Set<String> results = new HashSet<>();
|
Set<String> results = new HashSet<>();
|
||||||
for (int i = 0; i < text.length() - 1; i++) {
|
for (int i = 0; i < text.length(); i++) {
|
||||||
Character c = text.charAt(i);
|
Character c = text.charAt(i);
|
||||||
Map<Character, Object> child = (Map<Character, Object>) children.get(c);
|
Map<Character, Object> child = (Map<Character, Object>) children.get(c);
|
||||||
if (child == null) {
|
if (child == null) {
|
||||||
|
@ -127,6 +131,9 @@ public class SimpleTrie {
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
private static boolean recursionWithResult(String text, int index, Map<Character, Object> child, StringBuilder result) {
|
private static boolean recursionWithResult(String text, int index, Map<Character, Object> child, StringBuilder result) {
|
||||||
|
if (child.containsKey(CHARACTER_END)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (index == text.length()) {
|
if (index == text.length()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,20 +56,28 @@ public class SensitiveWordServiceImplTest extends BaseDbUnitTest {
|
||||||
SensitiveWordDO wordDO2 = randomPojo(SensitiveWordDO.class, o -> o.setName("笨蛋")
|
SensitiveWordDO wordDO2 = randomPojo(SensitiveWordDO.class, o -> o.setName("笨蛋")
|
||||||
.setTags(singletonList("蔬菜")).setStatus(CommonStatusEnum.ENABLE.getStatus()));
|
.setTags(singletonList("蔬菜")).setStatus(CommonStatusEnum.ENABLE.getStatus()));
|
||||||
sensitiveWordMapper.insert(wordDO2);
|
sensitiveWordMapper.insert(wordDO2);
|
||||||
|
SensitiveWordDO wordDO3 = randomPojo(SensitiveWordDO.class, o -> o.setName("白")
|
||||||
|
.setTags(singletonList("测试")).setStatus(CommonStatusEnum.ENABLE.getStatus()));
|
||||||
|
sensitiveWordMapper.insert(wordDO3);
|
||||||
|
SensitiveWordDO wordDO4 = randomPojo(SensitiveWordDO.class, o -> o.setName("白痴")
|
||||||
|
.setTags(singletonList("测试")).setStatus(CommonStatusEnum.ENABLE.getStatus()));
|
||||||
|
sensitiveWordMapper.insert(wordDO4);
|
||||||
|
|
||||||
// 调用
|
// 调用
|
||||||
sensitiveWordService.initLocalCache();
|
sensitiveWordService.initLocalCache();
|
||||||
// 断言 sensitiveWordTagsCache 缓存
|
// 断言 sensitiveWordTagsCache 缓存
|
||||||
assertEquals(SetUtils.asSet("论坛", "蔬菜"), sensitiveWordService.getSensitiveWordTagSet());
|
assertEquals(SetUtils.asSet("论坛", "蔬菜", "测试"), sensitiveWordService.getSensitiveWordTagSet());
|
||||||
// 断言 sensitiveWordCache
|
// 断言 sensitiveWordCache
|
||||||
assertEquals(2, sensitiveWordService.getSensitiveWordCache().size());
|
assertEquals(4, sensitiveWordService.getSensitiveWordCache().size());
|
||||||
assertPojoEquals(wordDO1, sensitiveWordService.getSensitiveWordCache().get(0));
|
assertPojoEquals(wordDO1, sensitiveWordService.getSensitiveWordCache().get(0));
|
||||||
assertPojoEquals(wordDO2, sensitiveWordService.getSensitiveWordCache().get(1));
|
assertPojoEquals(wordDO2, sensitiveWordService.getSensitiveWordCache().get(1));
|
||||||
|
assertPojoEquals(wordDO3, sensitiveWordService.getSensitiveWordCache().get(2));
|
||||||
// 断言 tagSensitiveWordTries 缓存
|
// 断言 tagSensitiveWordTries 缓存
|
||||||
assertNotNull(sensitiveWordService.getDefaultSensitiveWordTrie());
|
assertNotNull(sensitiveWordService.getDefaultSensitiveWordTrie());
|
||||||
assertEquals(2, sensitiveWordService.getTagSensitiveWordTries().size());
|
assertEquals(3, sensitiveWordService.getTagSensitiveWordTries().size());
|
||||||
assertNotNull(sensitiveWordService.getTagSensitiveWordTries().get("论坛"));
|
assertNotNull(sensitiveWordService.getTagSensitiveWordTries().get("论坛"));
|
||||||
assertNotNull(sensitiveWordService.getTagSensitiveWordTries().get("蔬菜"));
|
assertNotNull(sensitiveWordService.getTagSensitiveWordTries().get("蔬菜"));
|
||||||
|
assertNotNull(sensitiveWordService.getTagSensitiveWordTries().get("测试"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -231,11 +239,17 @@ public class SensitiveWordServiceImplTest extends BaseDbUnitTest {
|
||||||
testInitLocalCache();
|
testInitLocalCache();
|
||||||
// 准备参数
|
// 准备参数
|
||||||
String text = "你是傻瓜,你是笨蛋";
|
String text = "你是傻瓜,你是笨蛋";
|
||||||
|
|
||||||
// 调用
|
// 调用
|
||||||
List<String> result = sensitiveWordService.validateText(text, null);
|
List<String> result = sensitiveWordService.validateText(text, null);
|
||||||
// 断言
|
// 断言
|
||||||
assertEquals(Arrays.asList("傻瓜", "笨蛋"), result);
|
assertEquals(Arrays.asList("傻瓜", "笨蛋"), result);
|
||||||
|
|
||||||
|
// 准备参数
|
||||||
|
String text2 = "你是傻瓜,你是笨蛋,你是白";
|
||||||
|
// 调用
|
||||||
|
List<String> result2 = sensitiveWordService.validateText(text2, null);
|
||||||
|
// 断言
|
||||||
|
assertEquals(Arrays.asList("傻瓜", "笨蛋","白"), result2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -243,11 +257,18 @@ public class SensitiveWordServiceImplTest extends BaseDbUnitTest {
|
||||||
testInitLocalCache();
|
testInitLocalCache();
|
||||||
// 准备参数
|
// 准备参数
|
||||||
String text = "你是傻瓜,你是笨蛋";
|
String text = "你是傻瓜,你是笨蛋";
|
||||||
|
|
||||||
// 调用
|
// 调用
|
||||||
List<String> result = sensitiveWordService.validateText(text, singletonList("论坛"));
|
List<String> result = sensitiveWordService.validateText(text, singletonList("论坛"));
|
||||||
// 断言
|
// 断言
|
||||||
assertEquals(singletonList("傻瓜"), result);
|
assertEquals(singletonList("傻瓜"), result);
|
||||||
|
|
||||||
|
|
||||||
|
// 准备参数
|
||||||
|
String text2 = "你是白";
|
||||||
|
// 调用
|
||||||
|
List<String> result2 = sensitiveWordService.validateText(text2, singletonList("测试"));
|
||||||
|
// 断言
|
||||||
|
assertEquals(singletonList("白"), result2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -255,9 +276,13 @@ public class SensitiveWordServiceImplTest extends BaseDbUnitTest {
|
||||||
testInitLocalCache();
|
testInitLocalCache();
|
||||||
// 准备参数
|
// 准备参数
|
||||||
String text = "你是傻瓜,你是笨蛋";
|
String text = "你是傻瓜,你是笨蛋";
|
||||||
|
|
||||||
// 调用,断言
|
// 调用,断言
|
||||||
assertFalse(sensitiveWordService.isTextValid(text, null));
|
assertFalse(sensitiveWordService.isTextValid(text, null));
|
||||||
|
|
||||||
|
// 准备参数
|
||||||
|
String text2 = "你是白";
|
||||||
|
// 调用,断言
|
||||||
|
assertFalse(sensitiveWordService.isTextValid(text2, null));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -265,9 +290,13 @@ public class SensitiveWordServiceImplTest extends BaseDbUnitTest {
|
||||||
testInitLocalCache();
|
testInitLocalCache();
|
||||||
// 准备参数
|
// 准备参数
|
||||||
String text = "你是傻瓜,你是笨蛋";
|
String text = "你是傻瓜,你是笨蛋";
|
||||||
|
|
||||||
// 调用,断言
|
// 调用,断言
|
||||||
assertFalse(sensitiveWordService.isTextValid(text, singletonList("论坛")));
|
assertFalse(sensitiveWordService.isTextValid(text, singletonList("论坛")));
|
||||||
|
|
||||||
|
// 准备参数
|
||||||
|
String text2 = "你是白";
|
||||||
|
// 调用,断言
|
||||||
|
assertFalse(sensitiveWordService.isTextValid(text2, singletonList("测试")));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue