Commit 4ff6e293 authored by 刘基明's avatar 刘基明

文本查重

parent 809d85d7
......@@ -121,5 +121,12 @@ public class ThemeController {
return CommonResp.success();
}
@ApiOperation("文本查重初始化")
@GetMapping(value = "/initTextCheck")
@ResponseBody
public CommonResp<Void> initTextCheck() {
themeManager.initThemeTextCheck();
return CommonResp.success();
}
}
......@@ -23,7 +23,7 @@ public class CodeAutoGenerator {
String mysqlPassword = "@imeng123";
String jdbcUrl = "jdbc:mysql://rm-uf6r22t3d798q4kmk.mysql.rds.aliyuncs.com:3306/tamp_community";
// String[] tables = new String[]{"theme"};
String[] tables = new String[]{"theme_text_filter"};
String[] tables = new String[]{"theme_check_duplicate"};
String basePackage = "com.tanpu.community";
String mapperPackage = "dao.mapper.community";
String entityPackage = "dao.entity.community";
......
package com.tanpu.community.dao.entity.community;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
......@@ -19,17 +21,18 @@ import java.time.LocalDateTime;
* @author xudong
* @since 2021-09-08
*/
@TableName("theme_text_filter")
@ApiModel(value="ThemeTextFilterEntity对象", description="主题文字查重表")
@Builder
@Data
@AllArgsConstructor
@NoArgsConstructor
public class ThemeTextFilterEntity implements Serializable {
@TableName("theme_check_duplicate")
@ApiModel(value="ThemeCheckDuplicateEntity对象", description="主题文字查重表")
public class ThemeCheckDuplicateEntity implements Serializable {
private static final long serialVersionUID = 1L;
@ApiModelProperty(value = "id")
@TableId(value = "id", type = IdType.AUTO)
private Long id;
@ApiModelProperty(value = "主题Id")
......@@ -38,7 +41,7 @@ public class ThemeTextFilterEntity implements Serializable {
private Integer themeType;
@ApiModelProperty(value = "分块")
private String partition;
private String partitionText;
@ApiModelProperty(value = "分块hash")
private String partitionHash;
......@@ -49,6 +52,9 @@ public class ThemeTextFilterEntity implements Serializable {
@ApiModelProperty(value = "分块总数")
private Integer totalParts;
@ApiModelProperty(value = "作者id")
private String userId;
private LocalDateTime createTime;
private LocalDateTime updateTime;
......@@ -80,12 +86,12 @@ public class ThemeTextFilterEntity implements Serializable {
this.themeType = themeType;
}
public String getPartition() {
return partition;
public String getPartitionText() {
return partitionText;
}
public void setPartition(String partition) {
this.partition = partition;
public void setPartitionText(String partitionText) {
this.partitionText = partitionText;
}
public String getPartitionHash() {
......@@ -112,6 +118,14 @@ public class ThemeTextFilterEntity implements Serializable {
this.totalParts = totalParts;
}
public String getUserId() {
return userId;
}
public void setUserId(String userId) {
this.userId = userId;
}
public LocalDateTime getCreateTime() {
return createTime;
}
......@@ -138,14 +152,15 @@ public class ThemeTextFilterEntity implements Serializable {
@Override
public String toString() {
return "ThemeTextFilterEntity{" +
return "ThemeCheckDuplicateEntity{" +
"id=" + id +
", themeId=" + themeId +
", themeType=" + themeType +
", partition=" + partition +
", partitionText=" + partitionText +
", partitionHash=" + partitionHash +
", partitionNum=" + partitionNum +
", totalParts=" + totalParts +
", userId=" + userId +
", createTime=" + createTime +
", updateTime=" + updateTime +
", deleteTag=" + deleteTag +
......
package com.tanpu.community.dao.mapper.community;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.baomidou.mybatisplus.core.toolkit.Constants;
import com.tanpu.community.dao.entity.community.ThemeCheckDuplicateEntity;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import org.apache.ibatis.annotations.Update;
/**
* <p>
* 主题文字查重表 Mapper 接口
* </p>
*
* @author xudong
* @since 2021-09-08
*/
public interface ThemeCheckDuplicateMapper extends BaseMapper<ThemeCheckDuplicateEntity> {
@Update("update theme_check_duplicate set delete_tag=1 where theme_id =${id}")
Long deleteByThemeId(@Param("id") String themeId);
@Select("select max(tmp.d) from (" +
"select theme_id ,count(*) as d " +
"from theme_check_duplicate ${ew.customSqlSegment}" +
") tmp")
Integer check(@Param(Constants.WRAPPER) LambdaQueryWrapper wrapper);
}
package com.tanpu.community.dao.mapper.community;
import com.tanpu.community.dao.entity.community.ThemeTextFilterEntity;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
/**
* <p>
* 主题文字查重表 Mapper 接口
* </p>
*
* @author xudong
* @since 2021-09-08
*/
public interface ThemeTextFilterMapper extends BaseMapper<ThemeTextFilterEntity> {
}
......@@ -140,6 +140,9 @@ public class ThemeManager {
@Autowired
private NotificationService notificationService;
@Autowired
private ThemeTextCheckService themeTextCheckService;
@PostConstruct
public void init() throws IOException {
File f = new File(tmpDir);
......@@ -211,6 +214,11 @@ public class ThemeManager {
ThemeEntity themeEntity = new ThemeEntity();
BeanUtils.copyProperties(req, themeEntity);
themeEntity.setAuthorId(userId);
// 文本查重
if (themeTextCheckService.checkDuplicate(ConvertUtil.convert(themeEntity).getTextContent())) {
throw new BizException("圈子里已存在相似内容,请勿重复发布");
}
// 腾讯云敏感词校验
checkContent(req);
themeEntity.setContent(JsonUtil.toJson(req.getContent()));
......@@ -219,6 +227,7 @@ public class ThemeManager {
if (StringUtils.isBlank(req.getEditThemeId())) {
// 新建
themeService.insertTheme(themeEntity);
} else {
// 修改
themeService.update(themeEntity, req.getEditThemeId());
......@@ -233,12 +242,13 @@ public class ThemeManager {
}
themeAttachmentService.insertList(themeAttachments);
ESThemeQo esThemeQo = ConvertUtil.convert(themeEntity);
try {
esService.insertOrUpdateTheme(ConvertUtil.convert(themeEntity));
esService.insertOrUpdateTheme(esThemeQo);
} catch (Exception e) {
log.error("error in save theme to ES. themeId:{}, error:{}", themeEntity.getThemeId(), ExceptionUtils.getStackTrace(e));
}
themeTextCheckService.insert(esThemeQo.getTextContent(), themeEntity.getThemeId(), userId, themeEntity.getThemeType());
redisCache.evict(StringUtils.joinWith("_", CACHE_THEME_ID, themeEntity.getThemeId()));
......@@ -247,8 +257,8 @@ public class ThemeManager {
if (1 == req.getSyncToNewComm()) {
CommonResp response = synchronizeToNewsFeed(req, themeEntity.getThemeId(), userId);
if (response.isNotSuccess()) {
if ("8001".equals(response.getCode())) {
// 内容受限,不滚发布
if ("8001".equals(response.getCode()) || ErrorCodeConstant.THEME_SYNCHRONIZE_FAILED.getCode().equals(response.getCode())) {
// 内容受限,不滚发布
return CommonResp.error(ErrorCodeConstant.THEME_SYNCHRONIZE_FAILED.getCode(), "发布成功,同步失败:" + response.getMsg(), themeResp);
} else {
// 其他回滚异常
......@@ -283,11 +293,12 @@ public class ThemeManager {
imgList.forEach(img -> {
feedList.add(convertImg(img, userId));
});
} else if (RelTypeEnum.OFFLINE_ACTIVITY.type.equals(themeContentReq.getType())) {
// throw new BizException("线下活动暂时无法同步到专栏");
return CommonResp.error(ErrorCodeConstant.THEME_SYNCHRONIZE_FAILED.getCode(), "线下活动无法同步");
} else {
//其他类型的附件
if (RelTypeEnum.OFFLINE_ACTIVITY.type.equals(themeContentReq.getType())){
throw new BizException("线下活动暂时无法同步到专栏");
}
feedList.add(NewsFeedResReq.builder().relType(Integer.parseInt(themeContentReq.getType()))
.relId(themeContentReq.getValue())
.productType(themeContentReq.getProductType())
......@@ -750,7 +761,7 @@ public class ThemeManager {
// 逻辑删除主题,校验用户
public void delete(String themeId, String userId) {
themeService.deleteById(themeId, userId);
themeTextCheckService.deleteByThemeId(themeId);
this.evictThemeCache(themeId);
}
......@@ -939,4 +950,21 @@ public class ThemeManager {
redisCache.evict(StringUtils.joinWith("_", CACHE_THEME_ID, themeId));
}
@Transactional
public void initThemeTextCheck() {
List<ThemeEntity> themeEntities = themeService.queryLatestThemes(30);
List<ThemeQo> themeQos = ConvertUtil.themeEntitiesToDTOs(themeEntities);
for (ThemeQo themeQo : themeQos) {
List<ThemeContentQo> content = themeQo.getContent();
for (ThemeContentQo themeContentQo : content) {
if (themeContentQo.getType().equals(RelTypeEnum.TEXT.type)) {
themeTextCheckService.insertInit(themeContentQo.getValue(), themeQo.getThemeId(), themeQo.getAuthorId(), TimeUtils.getDateTimeOfTimestamp(themeQo.getCreateTime()));
}
}
}
}
}
package com.tanpu.community.service;
import com.tanpu.community.dao.entity.community.ThemeTextFilterEntity;
import com.tanpu.community.dao.mapper.community.ThemeTextFilterMapper;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.tanpu.common.constant.BizStatus;
import com.tanpu.common.exception.BizException;
import com.tanpu.community.dao.entity.community.ThemeCheckDuplicateEntity;
import com.tanpu.community.dao.mapper.community.ThemeCheckDuplicateMapper;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
@Service
public class ThemeTextCheckService {
@Resource
private ThemeTextFilterMapper themeTextFilterMapper;
private ThemeCheckDuplicateMapper themeCheckDuplicateMapper;
public void insert(){
ThemeTextFilterEntity build = ThemeTextFilterEntity.builder().build();
themeTextFilterMapper.insert(build);
public void insert(String content, String themeId, String userId,Integer themeType) {
themeCheckDuplicateMapper.deleteByThemeId(themeId);
content = content.replaceAll("[,。!? ]", "。");
String[] split = content.split("。");
for (int i = 0; i < split.length; i++) {
getMD5(split[i]);
insert(themeId, split[i], getMD5(split[i]), i + 1, split.length,userId);
}
}
public void deleteByThemeId(String themeId){
themeCheckDuplicateMapper.deleteByThemeId(themeId);
}
public void insert(String themeId, String partition, String partitionHash, Integer num, Integer total,String userId) {
ThemeCheckDuplicateEntity build = ThemeCheckDuplicateEntity.builder()
.themeId(themeId)
.partitionText(partition)
.partitionHash(partitionHash)
.partitionNum(num)
.totalParts(total)
.userId(userId)
.build();
themeCheckDuplicateMapper.insert(build);
}
public boolean checkDuplicate(String content) {
content = content.replaceAll("[,。!? ]", "。");
String[] split = content.split("。");
List<String> list = new ArrayList<>();
for (int i = 0; i < split.length; i++) {
list.add(getMD5(split[i]));
}
LambdaQueryWrapper<ThemeCheckDuplicateEntity> w = new LambdaQueryWrapper<ThemeCheckDuplicateEntity>().in(ThemeCheckDuplicateEntity::getPartitionHash, list)
.eq(ThemeCheckDuplicateEntity::getDeleteTag, BizStatus.DeleteTag.tag_init)
.groupBy(ThemeCheckDuplicateEntity::getThemeId);
Integer check = themeCheckDuplicateMapper.check(w);
if (check!=null && check*10>=split.length*8){
return true;
}
return false;
}
public static String getMD5(String str) {
try {
// 生成一个MD5加密计算摘要
MessageDigest md = MessageDigest.getInstance("MD5");
// 计算md5函数
md.update(str.getBytes());
// digest()最后确定返回md5 hash值,返回值为8为字符串。因为md5 hash值是16位的hex值,实际上就是8位的字符
// BigInteger函数则将8位的字符串转换成16位hex值,用字符串来表示;得到字符串形式的hash值
return new BigInteger(1, md.digest()).toString(16);
} catch (Exception e) {
throw new BizException("MD5加密出现错误");
}
}
public void insertInit(String content, String themeId,String userId, LocalDateTime createTime) {
themeCheckDuplicateMapper.deleteByThemeId(themeId);
content = content.replaceAll("[,。!? ]", "。");
String[] split = content.split("。");
for (int i = 0; i < split.length; i++) {
getMD5(split[i]);
ThemeCheckDuplicateEntity build = ThemeCheckDuplicateEntity.builder()
.themeId(themeId)
.partitionText(split[i])
.partitionHash(getMD5(split[i]))
.partitionNum(i+1)
.totalParts(split.length)
.userId(userId)
.createTime(createTime)
.updateTime(createTime)
.build();
themeCheckDuplicateMapper.insert(build);
}
}
}
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.tanpu.community.dao.mapper.community.ThemeTextFilterMapper">
<mapper namespace="com.tanpu.community.dao.mapper.community.ThemeCheckDuplicateMapper">
<!-- 通用查询映射结果 -->
<resultMap id="BaseResultMap" type="com.tanpu.community.dao.entity.community.ThemeTextFilterEntity">
<resultMap id="BaseResultMap" type="com.tanpu.community.dao.entity.community.ThemeCheckDuplicateEntity">
<id column="id" property="id" />
<result column="theme_id" property="themeId" />
<result column="theme_type" property="themeType" />
<result column="partition" property="partition" />
<result column="partition_text" property="partitionText" />
<result column="partition_hash" property="partitionHash" />
<result column="partition_num" property="partitionNum" />
<result column="total_parts" property="totalParts" />
<result column="user_id" property="userId" />
<result column="create_time" property="createTime" />
<result column="update_time" property="updateTime" />
<result column="delete_tag" property="deleteTag" />
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment