Commit 89d143f8 authored by 刘基明's avatar 刘基明

查重

parent 195d2a44
...@@ -5,6 +5,7 @@ import com.tanpu.common.constant.BizStatus; ...@@ -5,6 +5,7 @@ import com.tanpu.common.constant.BizStatus;
import com.tanpu.common.exception.BizException; import com.tanpu.common.exception.BizException;
import com.tanpu.community.dao.entity.community.ThemeCheckDuplicateEntity; import com.tanpu.community.dao.entity.community.ThemeCheckDuplicateEntity;
import com.tanpu.community.dao.mapper.community.ThemeCheckDuplicateMapper; import com.tanpu.community.dao.mapper.community.ThemeCheckDuplicateMapper;
import com.tanpu.community.util.TimeUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
...@@ -24,22 +25,22 @@ public class ThemeTextCheckService { ...@@ -24,22 +25,22 @@ public class ThemeTextCheckService {
private final String regex = "[,。!?;;:, ]"; private final String regex = "[,。!?;;:, ]";
// 插入 // 插入
public void insert(String content, String themeId, String userId,Integer themeType) { public void insert(String content, String themeId, String userId, Integer themeType) {
themeCheckDuplicateMapper.deleteByThemeId(themeId); themeCheckDuplicateMapper.deleteByThemeId(themeId);
content = content.replaceAll(regex, "。"); content = content.replaceAll(regex, "。");
String[] split = content.split("。"); String[] split = content.split("。");
for (int i = 0; i < split.length; i++) { for (int i = 0; i < split.length; i++) {
String trim = StringUtils.trim(split[i]); String trim = StringUtils.trim(split[i]);
insert(themeId, trim, getMD5(trim), i + 1, split.length,userId); insert(themeId, trim, getMD5(trim), i + 1, split.length, userId);
} }
} }
// 删除 // 删除
public void deleteByThemeId(String themeId){ public void deleteByThemeId(String themeId) {
themeCheckDuplicateMapper.deleteByThemeId(themeId); themeCheckDuplicateMapper.deleteByThemeId(themeId);
} }
private void insert(String themeId, String partition, String partitionHash, Integer num, Integer total,String userId) { private void insert(String themeId, String partition, String partitionHash, Integer num, Integer total, String userId) {
ThemeCheckDuplicateEntity build = ThemeCheckDuplicateEntity.builder() ThemeCheckDuplicateEntity build = ThemeCheckDuplicateEntity.builder()
.themeId(themeId) .themeId(themeId)
...@@ -53,7 +54,8 @@ public class ThemeTextCheckService { ...@@ -53,7 +54,8 @@ public class ThemeTextCheckService {
} }
public boolean checkDuplicate(String content) { public boolean checkDuplicate(String content) {
if (content.length()<15){ // 文字数小于50不查重
if (content.length() < 50) {
return false; return false;
} }
content = content.replaceAll(regex, "。"); content = content.replaceAll(regex, "。");
...@@ -63,11 +65,14 @@ public class ThemeTextCheckService { ...@@ -63,11 +65,14 @@ public class ThemeTextCheckService {
String trim = StringUtils.trim(split[i]); String trim = StringUtils.trim(split[i]);
list.add(getMD5(trim)); list.add(getMD5(trim));
} }
// 一年以内
LambdaQueryWrapper<ThemeCheckDuplicateEntity> w = new LambdaQueryWrapper<ThemeCheckDuplicateEntity>().in(ThemeCheckDuplicateEntity::getPartitionHash, list) LambdaQueryWrapper<ThemeCheckDuplicateEntity> w = new LambdaQueryWrapper<ThemeCheckDuplicateEntity>().in(ThemeCheckDuplicateEntity::getPartitionHash, list)
.eq(ThemeCheckDuplicateEntity::getDeleteTag, BizStatus.DeleteTag.tag_init) .eq(ThemeCheckDuplicateEntity::getDeleteTag, BizStatus.DeleteTag.tag_init)
.gt(ThemeCheckDuplicateEntity::getCreateTime, TimeUtils.getDaysBefore(360))
.groupBy(ThemeCheckDuplicateEntity::getThemeId); .groupBy(ThemeCheckDuplicateEntity::getThemeId);
Integer check = themeCheckDuplicateMapper.check(w); Integer check = themeCheckDuplicateMapper.check(w);
if (check!=null && check*10>=split.length*8){ // 重复率大于80%
if (check != null && check * 10 >= split.length * 8) {
return true; return true;
} }
return false; return false;
...@@ -89,7 +94,7 @@ public class ThemeTextCheckService { ...@@ -89,7 +94,7 @@ public class ThemeTextCheckService {
} }
// 初始化 // 初始化
public void insertInit(String content, String themeId,String userId, LocalDateTime createTime) { public void insertInit(String content, String themeId, String userId, LocalDateTime createTime) {
themeCheckDuplicateMapper.deleteByThemeId(themeId); themeCheckDuplicateMapper.deleteByThemeId(themeId);
content = content.replaceAll(regex, "。"); content = content.replaceAll(regex, "。");
...@@ -101,7 +106,7 @@ public class ThemeTextCheckService { ...@@ -101,7 +106,7 @@ public class ThemeTextCheckService {
.themeId(themeId) .themeId(themeId)
.partitionText(trim) .partitionText(trim)
.partitionHash(getMD5(split[i])) .partitionHash(getMD5(split[i]))
.partitionNum(i+1) .partitionNum(i + 1)
.totalParts(split.length) .totalParts(split.length)
.userId(userId) .userId(userId)
.createTime(createTime) .createTime(createTime)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment