Commit 51f558a1 authored by 刘基明's avatar 刘基明

查重bug fix

parent 31064242
......@@ -18,6 +18,7 @@ import java.util.List;
@Slf4j
public class ThemeTextCheckService {
public static final int SENTENCE_MIN_LENGTH = 5;
@Resource
private ThemeCheckDuplicateMapper themeCheckDuplicateMapper;
......@@ -37,8 +38,9 @@ public class ThemeTextCheckService {
String[] split = content.split("。");
for (int i = 0; i < split.length; i++) {
String trim = StringUtils.trim(split[i]);
if (trim.length() > SENTENCE_MIN_LENGTH) {
insert(themeId, trim, getHash(trim), i + 1, split.length, userId, themeType);
}
}
} catch (Exception e) {
log.error("文本查重insert失败,themeId:" + themeId);
......@@ -75,8 +77,15 @@ public class ThemeTextCheckService {
List<Integer> list = new ArrayList<>();
for (int i = 0; i < split.length; i++) {
String trim = StringUtils.trim(split[i]);
if (trim.length() > SENTENCE_MIN_LENGTH) {
list.add(getHash(trim));
}
}
if (list.size() <= 2) {
return false;
}
// 一年以内
LambdaQueryWrapper<ThemeCheckDuplicateEntity> w = new LambdaQueryWrapper<ThemeCheckDuplicateEntity>().in(ThemeCheckDuplicateEntity::getPartitionHash, list)
.eq(ThemeCheckDuplicateEntity::getDeleteTag, BizStatus.DeleteTag.tag_init)
......@@ -84,7 +93,7 @@ public class ThemeTextCheckService {
.groupBy(ThemeCheckDuplicateEntity::getThemeId);
Integer check = themeCheckDuplicateMapper.check(w);
// 重复率大于80%
if (check != null && check * 10 >= split.length * 8) {
if (check != null && check >= list.size() * 0.8) {
return true;
}
return false;
......@@ -114,7 +123,7 @@ public class ThemeTextCheckService {
String[] split = content.split("。");
for (int i = 0; i < split.length; i++) {
String trim = StringUtils.trim(split[i]);
if (trim.length() > SENTENCE_MIN_LENGTH) {
ThemeCheckDuplicateEntity build = ThemeCheckDuplicateEntity.builder()
.themeId(themeId)
.partitionText(trim)
......@@ -129,6 +138,8 @@ public class ThemeTextCheckService {
themeCheckDuplicateMapper.insert(build);
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment