Commit 89d143f8 authored by 刘基明's avatar 刘基明

查重

parent 195d2a44
......@@ -5,6 +5,7 @@ import com.tanpu.common.constant.BizStatus;
import com.tanpu.common.exception.BizException;
import com.tanpu.community.dao.entity.community.ThemeCheckDuplicateEntity;
import com.tanpu.community.dao.mapper.community.ThemeCheckDuplicateMapper;
import com.tanpu.community.util.TimeUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
......@@ -24,22 +25,22 @@ public class ThemeTextCheckService {
private final String regex = "[,。!?;;:, ]";
// 插入
public void insert(String content, String themeId, String userId,Integer themeType) {
public void insert(String content, String themeId, String userId, Integer themeType) {
themeCheckDuplicateMapper.deleteByThemeId(themeId);
content = content.replaceAll(regex, "。");
String[] split = content.split("。");
for (int i = 0; i < split.length; i++) {
String trim = StringUtils.trim(split[i]);
insert(themeId, trim, getMD5(trim), i + 1, split.length,userId);
insert(themeId, trim, getMD5(trim), i + 1, split.length, userId);
}
}
// 删除
public void deleteByThemeId(String themeId){
public void deleteByThemeId(String themeId) {
themeCheckDuplicateMapper.deleteByThemeId(themeId);
}
private void insert(String themeId, String partition, String partitionHash, Integer num, Integer total,String userId) {
private void insert(String themeId, String partition, String partitionHash, Integer num, Integer total, String userId) {
ThemeCheckDuplicateEntity build = ThemeCheckDuplicateEntity.builder()
.themeId(themeId)
......@@ -53,7 +54,8 @@ public class ThemeTextCheckService {
}
public boolean checkDuplicate(String content) {
if (content.length()<15){
// 文字数小于50不查重
if (content.length() < 50) {
return false;
}
content = content.replaceAll(regex, "。");
......@@ -63,11 +65,14 @@ public class ThemeTextCheckService {
String trim = StringUtils.trim(split[i]);
list.add(getMD5(trim));
}
// 一年以内
LambdaQueryWrapper<ThemeCheckDuplicateEntity> w = new LambdaQueryWrapper<ThemeCheckDuplicateEntity>().in(ThemeCheckDuplicateEntity::getPartitionHash, list)
.eq(ThemeCheckDuplicateEntity::getDeleteTag, BizStatus.DeleteTag.tag_init)
.gt(ThemeCheckDuplicateEntity::getCreateTime, TimeUtils.getDaysBefore(360))
.groupBy(ThemeCheckDuplicateEntity::getThemeId);
Integer check = themeCheckDuplicateMapper.check(w);
if (check!=null && check*10>=split.length*8){
// 重复率大于80%
if (check != null && check * 10 >= split.length * 8) {
return true;
}
return false;
......@@ -89,7 +94,7 @@ public class ThemeTextCheckService {
}
// 初始化
public void insertInit(String content, String themeId,String userId, LocalDateTime createTime) {
public void insertInit(String content, String themeId, String userId, LocalDateTime createTime) {
themeCheckDuplicateMapper.deleteByThemeId(themeId);
content = content.replaceAll(regex, "。");
......@@ -101,7 +106,7 @@ public class ThemeTextCheckService {
.themeId(themeId)
.partitionText(trim)
.partitionHash(getMD5(split[i]))
.partitionNum(i+1)
.partitionNum(i + 1)
.totalParts(split.length)
.userId(userId)
.createTime(createTime)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment