ThemeTextCheckService.java 5.43 KB
Newer Older
刘基明's avatar
刘基明 committed
1 2
package com.tanpu.community.service;

刘基明's avatar
刘基明 committed
3 4 5 6
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.tanpu.common.constant.BizStatus;
import com.tanpu.community.dao.entity.community.ThemeCheckDuplicateEntity;
import com.tanpu.community.dao.mapper.community.ThemeCheckDuplicateMapper;
刘基明's avatar
刘基明 committed
7
import com.tanpu.community.util.TimeUtils;
刘基明's avatar
刘基明 committed
8
import lombok.extern.slf4j.Slf4j;
刘基明's avatar
刘基明 committed
9
import org.apache.commons.lang3.StringUtils;
刘基明's avatar
刘基明 committed
10
import org.springframework.stereotype.Service;
刘基明's avatar
刘基明 committed
11 12

import javax.annotation.Resource;
刘基明's avatar
刘基明 committed
13 14 15
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
刘基明's avatar
刘基明 committed
16

刘基明's avatar
刘基明 committed
17
@Service
刘基明's avatar
刘基明 committed
18
@Slf4j
刘基明's avatar
刘基明 committed
19 20
public class ThemeTextCheckService {

刘基明's avatar
刘基明 committed
21
    public static final int SENTENCE_MIN_LENGTH = 5;
刘基明's avatar
刘基明 committed
22
    @Resource
刘基明's avatar
刘基明 committed
23
    private ThemeCheckDuplicateMapper themeCheckDuplicateMapper;
刘基明's avatar
刘基明 committed
24

刘基明's avatar
刘基明 committed
25
    private final String regex = "[,。!?;;:, ]";
刘基明's avatar
刘基明 committed
26

刘基明's avatar
刘基明 committed
27
    // 插入
刘基明's avatar
刘基明 committed
28
    public void insert(String content, String themeId, String userId, Integer themeType, String editThemeId) {
刘基明's avatar
刘基明 committed
29 30 31 32 33 34
        if (StringUtils.isBlank(content) || content.length() < 50) {
            return;
        }

        try {
            if (StringUtils.isNotBlank(editThemeId)) {
刘基明's avatar
刘基明 committed
35 36 37 38 39 40
                themeCheckDuplicateMapper.deleteByThemeId(editThemeId);
            }
            content = content.replaceAll(regex, "。");
            String[] split = content.split("。");
            for (int i = 0; i < split.length; i++) {
                String trim = StringUtils.trim(split[i]);
刘基明's avatar
刘基明 committed
41 42 43
                if (trim.length() > SENTENCE_MIN_LENGTH) {
                    insert(themeId, trim, getHash(trim), i + 1, split.length, userId, themeType);
                }
刘基明's avatar
刘基明 committed
44
            }
刘基明's avatar
刘基明 committed
45 46
        } catch (Exception e) {
            log.error("文本查重insert失败,themeId:" + themeId);
刘基明's avatar
刘基明 committed
47
        }
刘基明's avatar
刘基明 committed
48

刘基明's avatar
刘基明 committed
49 50
    }

刘基明's avatar
刘基明 committed
51
    // 删除
刘基明's avatar
刘基明 committed
52
    public void deleteByThemeId(String themeId) {
刘基明's avatar
刘基明 committed
53 54 55
        themeCheckDuplicateMapper.deleteByThemeId(themeId);
    }

刘基明's avatar
刘基明 committed
56
    private void insert(String themeId, String partition, Integer partitionHash, Integer num, Integer total, String userId, Integer themeType) {
刘基明's avatar
刘基明 committed
57 58 59 60 61 62 63 64

        ThemeCheckDuplicateEntity build = ThemeCheckDuplicateEntity.builder()
                .themeId(themeId)
                .partitionText(partition)
                .partitionHash(partitionHash)
                .partitionNum(num)
                .totalParts(total)
                .userId(userId)
刘基明's avatar
刘基明 committed
65
                .themeType(themeType)
刘基明's avatar
刘基明 committed
66 67 68 69 70
                .build();
        themeCheckDuplicateMapper.insert(build);
    }

    public boolean checkDuplicate(String content) {
刘基明's avatar
刘基明 committed
71 72
        // 文字数小于50不查重
        if (content.length() < 50) {
刘基明's avatar
刘基明 committed
73 74
            return false;
        }
刘基明's avatar
刘基明 committed
75
        content = content.replaceAll(regex, "。");
刘基明's avatar
刘基明 committed
76
        String[] split = content.split("。");
刘基明's avatar
刘基明 committed
77
        List<Integer> list = new ArrayList<>();
刘基明's avatar
刘基明 committed
78
        for (int i = 0; i < split.length; i++) {
刘基明's avatar
刘基明 committed
79
            String trim = StringUtils.trim(split[i]);
刘基明's avatar
刘基明 committed
80 81 82
            if (trim.length() > SENTENCE_MIN_LENGTH) {
                list.add(getHash(trim));
            }
刘基明's avatar
刘基明 committed
83
        }
刘基明's avatar
刘基明 committed
84 85 86 87 88

        if (list.size() <= 2) {
            return false;
        }

刘基明's avatar
刘基明 committed
89
        // 一年以内
刘基明's avatar
刘基明 committed
90 91
        LambdaQueryWrapper<ThemeCheckDuplicateEntity> w = new LambdaQueryWrapper<ThemeCheckDuplicateEntity>().in(ThemeCheckDuplicateEntity::getPartitionHash, list)
                .eq(ThemeCheckDuplicateEntity::getDeleteTag, BizStatus.DeleteTag.tag_init)
刘基明's avatar
刘基明 committed
92
                .gt(ThemeCheckDuplicateEntity::getCreateTime, TimeUtils.getDaysBefore(360))
刘基明's avatar
刘基明 committed
93 94
                .groupBy(ThemeCheckDuplicateEntity::getThemeId);
        Integer check = themeCheckDuplicateMapper.check(w);
刘基明's avatar
刘基明 committed
95
        // 重复率大于80%
刘基明's avatar
刘基明 committed
96
        if (check != null && check >= list.size() * 0.8) {
刘基明's avatar
刘基明 committed
97 98 99 100 101 102
            return true;
        }
        return false;
    }


刘基明's avatar
刘基明 committed
103 104 105 106 107 108 109 110 111 112 113 114 115
    public static Integer getHash(String str) {
        return str.hashCode();
        // try {
        //     // 生成一个MD5加密计算摘要
        //     MessageDigest md = MessageDigest.getInstance("MD5");
        //     // 计算md5函数
        //     md.update(str.getBytes());
        //     // digest()最后确定返回md5 hash值,返回值为8为字符串。因为md5 hash值是16位的hex值,实际上就是8位的字符
        //     // BigInteger函数则将8位的字符串转换成16位hex值,用字符串来表示;得到字符串形式的hash值
        //     return new BigInteger(1, md.digest()).toString(16);
        // } catch (Exception e) {
        //     throw new BizException("MD5加密出现错误");
        // }
刘基明's avatar
刘基明 committed
116 117
    }

刘基明's avatar
刘基明 committed
118
    // 初始化
刘基明's avatar
刘基明 committed
119
    public void insertInit(String content, String themeId, String userId, LocalDateTime createTime, Integer themeType) {
刘基明's avatar
刘基明 committed
120
        themeCheckDuplicateMapper.deleteByThemeId(themeId);
刘基明's avatar
刘基明 committed
121 122

        content = content.replaceAll(regex, "。");
刘基明's avatar
刘基明 committed
123 124
        String[] split = content.split("。");
        for (int i = 0; i < split.length; i++) {
刘基明's avatar
刘基明 committed
125
            String trim = StringUtils.trim(split[i]);
刘基明's avatar
刘基明 committed
126 127 128 129 130 131 132 133 134 135 136 137 138 139
            if (trim.length() > SENTENCE_MIN_LENGTH) {
                ThemeCheckDuplicateEntity build = ThemeCheckDuplicateEntity.builder()
                        .themeId(themeId)
                        .partitionText(trim)
                        .partitionHash(getHash(split[i]))
                        .partitionNum(i + 1)
                        .totalParts(split.length)
                        .userId(userId)
                        .themeType(themeType)
                        .createTime(createTime)
                        .updateTime(createTime)
                        .build();
                themeCheckDuplicateMapper.insert(build);
            }
刘基明's avatar
刘基明 committed
140 141

        }
刘基明's avatar
刘基明 committed
142 143


刘基明's avatar
刘基明 committed
144 145 146
    }

}