YunaiV 2023-10-19 23:28:57 +08:00
parent e8da08a2ea
commit 0db6a80044
4 changed files with 53 additions and 16 deletions

View File

@ -91,8 +91,8 @@ public interface SensitiveWordService {
* *
* *
* @param text * @param text
* @param tags * @param tags
* @return * @return
*/ */
boolean isTextValid(String text, List<String> tags); boolean isTextValid(String text, List<String> tags);

View File

@ -258,6 +258,7 @@ public class SensitiveWordServiceImpl implements SensitiveWordService {
if (trie == null) { if (trie == null) {
continue; continue;
} }
// 如果有一个标签不合法,则返回 false 不合法
if (!trie.isValid(text)) { if (!trie.isValid(text)) {
return false; return false;
} }

View File

@ -30,9 +30,10 @@ public class SimpleTrie {
* @param strs * @param strs
*/ */
public SimpleTrie(Collection<String> strs) { public SimpleTrie(Collection<String> strs) {
children = new HashMap<>(); // 排序,优先使用较短的前缀
strs = CollUtil.sort(strs, String::compareTo);
// 构建树 // 构建树
CollUtil.sort(strs, String::compareTo); // 排序,优先使用较短的前缀 children = new HashMap<>();
for (String str : strs) { for (String str : strs) {
Map<Character, Object> child = children; Map<Character, Object> child = children;
// 遍历每个字符 // 遍历每个字符
@ -56,11 +57,11 @@ public class SimpleTrie {
* *
* *
* @param text * @param text
* @return ok * @return true- false-
*/ */
public boolean isValid(String text) { public boolean isValid(String text) {
// 遍历 text使用每一个 [i, n) 段的字符串,使用 children 前缀树匹配,是否包含敏感词 // 遍历 text使用每一个 [i, n) 段的字符串,使用 children 前缀树匹配,是否包含敏感词
for (int i = 0; i < text.length() - 1; i++) { for (int i = 0; i < text.length(); i++) {
Map<Character, Object> child = (Map<Character, Object>) children.get(text.charAt(i)); Map<Character, Object> child = (Map<Character, Object>) children.get(text.charAt(i));
if (child == null) { if (child == null) {
continue; continue;
@ -74,14 +75,17 @@ public class SimpleTrie {
} }
/** /**
* *
* *
* @param text * @param text
* @param index * @param index
* @param child * @param child
* @return * @return true- false-
*/ */
private boolean recursion(String text, int index, Map<Character, Object> child) { private boolean recursion(String text, int index, Map<Character, Object> child) {
if (child.containsKey(CHARACTER_END)) {
return false;
}
if (index == text.length()) { if (index == text.length()) {
return true; return true;
} }
@ -99,7 +103,7 @@ public class SimpleTrie {
*/ */
public List<String> validate(String text) { public List<String> validate(String text) {
Set<String> results = new HashSet<>(); Set<String> results = new HashSet<>();
for (int i = 0; i < text.length() - 1; i++) { for (int i = 0; i < text.length(); i++) {
Character c = text.charAt(i); Character c = text.charAt(i);
Map<Character, Object> child = (Map<Character, Object>) children.get(c); Map<Character, Object> child = (Map<Character, Object>) children.get(c);
if (child == null) { if (child == null) {
@ -127,6 +131,9 @@ public class SimpleTrie {
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private static boolean recursionWithResult(String text, int index, Map<Character, Object> child, StringBuilder result) { private static boolean recursionWithResult(String text, int index, Map<Character, Object> child, StringBuilder result) {
if (child.containsKey(CHARACTER_END)) {
return false;
}
if (index == text.length()) { if (index == text.length()) {
return true; return true;
} }

View File

@ -56,20 +56,28 @@ public class SensitiveWordServiceImplTest extends BaseDbUnitTest {
SensitiveWordDO wordDO2 = randomPojo(SensitiveWordDO.class, o -> o.setName("笨蛋") SensitiveWordDO wordDO2 = randomPojo(SensitiveWordDO.class, o -> o.setName("笨蛋")
.setTags(singletonList("蔬菜")).setStatus(CommonStatusEnum.ENABLE.getStatus())); .setTags(singletonList("蔬菜")).setStatus(CommonStatusEnum.ENABLE.getStatus()));
sensitiveWordMapper.insert(wordDO2); sensitiveWordMapper.insert(wordDO2);
SensitiveWordDO wordDO3 = randomPojo(SensitiveWordDO.class, o -> o.setName("白")
.setTags(singletonList("测试")).setStatus(CommonStatusEnum.ENABLE.getStatus()));
sensitiveWordMapper.insert(wordDO3);
SensitiveWordDO wordDO4 = randomPojo(SensitiveWordDO.class, o -> o.setName("白痴")
.setTags(singletonList("测试")).setStatus(CommonStatusEnum.ENABLE.getStatus()));
sensitiveWordMapper.insert(wordDO4);
// 调用 // 调用
sensitiveWordService.initLocalCache(); sensitiveWordService.initLocalCache();
// 断言 sensitiveWordTagsCache 缓存 // 断言 sensitiveWordTagsCache 缓存
assertEquals(SetUtils.asSet("论坛", "蔬菜"), sensitiveWordService.getSensitiveWordTagSet()); assertEquals(SetUtils.asSet("论坛", "蔬菜", "测试"), sensitiveWordService.getSensitiveWordTagSet());
// 断言 sensitiveWordCache // 断言 sensitiveWordCache
assertEquals(2, sensitiveWordService.getSensitiveWordCache().size()); assertEquals(4, sensitiveWordService.getSensitiveWordCache().size());
assertPojoEquals(wordDO1, sensitiveWordService.getSensitiveWordCache().get(0)); assertPojoEquals(wordDO1, sensitiveWordService.getSensitiveWordCache().get(0));
assertPojoEquals(wordDO2, sensitiveWordService.getSensitiveWordCache().get(1)); assertPojoEquals(wordDO2, sensitiveWordService.getSensitiveWordCache().get(1));
assertPojoEquals(wordDO3, sensitiveWordService.getSensitiveWordCache().get(2));
// 断言 tagSensitiveWordTries 缓存 // 断言 tagSensitiveWordTries 缓存
assertNotNull(sensitiveWordService.getDefaultSensitiveWordTrie()); assertNotNull(sensitiveWordService.getDefaultSensitiveWordTrie());
assertEquals(2, sensitiveWordService.getTagSensitiveWordTries().size()); assertEquals(3, sensitiveWordService.getTagSensitiveWordTries().size());
assertNotNull(sensitiveWordService.getTagSensitiveWordTries().get("论坛")); assertNotNull(sensitiveWordService.getTagSensitiveWordTries().get("论坛"));
assertNotNull(sensitiveWordService.getTagSensitiveWordTries().get("蔬菜")); assertNotNull(sensitiveWordService.getTagSensitiveWordTries().get("蔬菜"));
assertNotNull(sensitiveWordService.getTagSensitiveWordTries().get("测试"));
} }
@Test @Test
@ -231,11 +239,17 @@ public class SensitiveWordServiceImplTest extends BaseDbUnitTest {
testInitLocalCache(); testInitLocalCache();
// 准备参数 // 准备参数
String text = "你是傻瓜,你是笨蛋"; String text = "你是傻瓜,你是笨蛋";
// 调用 // 调用
List<String> result = sensitiveWordService.validateText(text, null); List<String> result = sensitiveWordService.validateText(text, null);
// 断言 // 断言
assertEquals(Arrays.asList("傻瓜", "笨蛋"), result); assertEquals(Arrays.asList("傻瓜", "笨蛋"), result);
// 准备参数
String text2 = "你是傻瓜,你是笨蛋,你是白";
// 调用
List<String> result2 = sensitiveWordService.validateText(text2, null);
// 断言
assertEquals(Arrays.asList("傻瓜", "笨蛋","白"), result2);
} }
@Test @Test
@ -243,11 +257,18 @@ public class SensitiveWordServiceImplTest extends BaseDbUnitTest {
testInitLocalCache(); testInitLocalCache();
// 准备参数 // 准备参数
String text = "你是傻瓜,你是笨蛋"; String text = "你是傻瓜,你是笨蛋";
// 调用 // 调用
List<String> result = sensitiveWordService.validateText(text, singletonList("论坛")); List<String> result = sensitiveWordService.validateText(text, singletonList("论坛"));
// 断言 // 断言
assertEquals(singletonList("傻瓜"), result); assertEquals(singletonList("傻瓜"), result);
// 准备参数
String text2 = "你是白";
// 调用
List<String> result2 = sensitiveWordService.validateText(text2, singletonList("测试"));
// 断言
assertEquals(singletonList("白"), result2);
} }
@Test @Test
@ -255,9 +276,13 @@ public class SensitiveWordServiceImplTest extends BaseDbUnitTest {
testInitLocalCache(); testInitLocalCache();
// 准备参数 // 准备参数
String text = "你是傻瓜,你是笨蛋"; String text = "你是傻瓜,你是笨蛋";
// 调用,断言 // 调用,断言
assertFalse(sensitiveWordService.isTextValid(text, null)); assertFalse(sensitiveWordService.isTextValid(text, null));
// 准备参数
String text2 = "你是白";
// 调用,断言
assertFalse(sensitiveWordService.isTextValid(text2, null));
} }
@Test @Test
@ -265,9 +290,13 @@ public class SensitiveWordServiceImplTest extends BaseDbUnitTest {
testInitLocalCache(); testInitLocalCache();
// 准备参数 // 准备参数
String text = "你是傻瓜,你是笨蛋"; String text = "你是傻瓜,你是笨蛋";
// 调用,断言 // 调用,断言
assertFalse(sensitiveWordService.isTextValid(text, singletonList("论坛"))); assertFalse(sensitiveWordService.isTextValid(text, singletonList("论坛")));
// 准备参数
String text2 = "你是白";
// 调用,断言
assertFalse(sensitiveWordService.isTextValid(text2, singletonList("测试")));
} }
} }