Commit 4ff6e293 authored by 刘基明's avatar 刘基明

文本查重

parent 809d85d7
...@@ -121,5 +121,12 @@ public class ThemeController { ...@@ -121,5 +121,12 @@ public class ThemeController {
return CommonResp.success(); return CommonResp.success();
} }
@ApiOperation("文本查重初始化")
@GetMapping(value = "/initTextCheck")
@ResponseBody
public CommonResp<Void> initTextCheck() {
themeManager.initThemeTextCheck();
return CommonResp.success();
}
} }
...@@ -23,7 +23,7 @@ public class CodeAutoGenerator { ...@@ -23,7 +23,7 @@ public class CodeAutoGenerator {
String mysqlPassword = "@imeng123"; String mysqlPassword = "@imeng123";
String jdbcUrl = "jdbc:mysql://rm-uf6r22t3d798q4kmk.mysql.rds.aliyuncs.com:3306/tamp_community"; String jdbcUrl = "jdbc:mysql://rm-uf6r22t3d798q4kmk.mysql.rds.aliyuncs.com:3306/tamp_community";
// String[] tables = new String[]{"theme"}; // String[] tables = new String[]{"theme"};
String[] tables = new String[]{"theme_text_filter"}; String[] tables = new String[]{"theme_check_duplicate"};
String basePackage = "com.tanpu.community"; String basePackage = "com.tanpu.community";
String mapperPackage = "dao.mapper.community"; String mapperPackage = "dao.mapper.community";
String entityPackage = "dao.entity.community"; String entityPackage = "dao.entity.community";
......
package com.tanpu.community.dao.entity.community; package com.tanpu.community.dao.entity.community;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName; import com.baomidou.mybatisplus.annotation.TableName;
import io.swagger.annotations.ApiModel; import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty; import io.swagger.annotations.ApiModelProperty;
...@@ -19,17 +21,18 @@ import java.time.LocalDateTime; ...@@ -19,17 +21,18 @@ import java.time.LocalDateTime;
* @author xudong * @author xudong
* @since 2021-09-08 * @since 2021-09-08
*/ */
@TableName("theme_text_filter")
@ApiModel(value="ThemeTextFilterEntity对象", description="主题文字查重表")
@Builder @Builder
@Data @Data
@AllArgsConstructor @AllArgsConstructor
@NoArgsConstructor @NoArgsConstructor
public class ThemeTextFilterEntity implements Serializable { @TableName("theme_check_duplicate")
@ApiModel(value="ThemeCheckDuplicateEntity对象", description="主题文字查重表")
public class ThemeCheckDuplicateEntity implements Serializable {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
@ApiModelProperty(value = "id") @ApiModelProperty(value = "id")
@TableId(value = "id", type = IdType.AUTO)
private Long id; private Long id;
@ApiModelProperty(value = "主题Id") @ApiModelProperty(value = "主题Id")
...@@ -38,7 +41,7 @@ public class ThemeTextFilterEntity implements Serializable { ...@@ -38,7 +41,7 @@ public class ThemeTextFilterEntity implements Serializable {
private Integer themeType; private Integer themeType;
@ApiModelProperty(value = "分块") @ApiModelProperty(value = "分块")
private String partition; private String partitionText;
@ApiModelProperty(value = "分块hash") @ApiModelProperty(value = "分块hash")
private String partitionHash; private String partitionHash;
...@@ -49,6 +52,9 @@ public class ThemeTextFilterEntity implements Serializable { ...@@ -49,6 +52,9 @@ public class ThemeTextFilterEntity implements Serializable {
@ApiModelProperty(value = "分块总数") @ApiModelProperty(value = "分块总数")
private Integer totalParts; private Integer totalParts;
@ApiModelProperty(value = "作者id")
private String userId;
private LocalDateTime createTime; private LocalDateTime createTime;
private LocalDateTime updateTime; private LocalDateTime updateTime;
...@@ -80,12 +86,12 @@ public class ThemeTextFilterEntity implements Serializable { ...@@ -80,12 +86,12 @@ public class ThemeTextFilterEntity implements Serializable {
this.themeType = themeType; this.themeType = themeType;
} }
public String getPartition() { public String getPartitionText() {
return partition; return partitionText;
} }
public void setPartition(String partition) { public void setPartitionText(String partitionText) {
this.partition = partition; this.partitionText = partitionText;
} }
public String getPartitionHash() { public String getPartitionHash() {
...@@ -112,6 +118,14 @@ public class ThemeTextFilterEntity implements Serializable { ...@@ -112,6 +118,14 @@ public class ThemeTextFilterEntity implements Serializable {
this.totalParts = totalParts; this.totalParts = totalParts;
} }
public String getUserId() {
return userId;
}
public void setUserId(String userId) {
this.userId = userId;
}
public LocalDateTime getCreateTime() { public LocalDateTime getCreateTime() {
return createTime; return createTime;
} }
...@@ -138,14 +152,15 @@ public class ThemeTextFilterEntity implements Serializable { ...@@ -138,14 +152,15 @@ public class ThemeTextFilterEntity implements Serializable {
@Override @Override
public String toString() { public String toString() {
return "ThemeTextFilterEntity{" + return "ThemeCheckDuplicateEntity{" +
"id=" + id + "id=" + id +
", themeId=" + themeId + ", themeId=" + themeId +
", themeType=" + themeType + ", themeType=" + themeType +
", partition=" + partition + ", partitionText=" + partitionText +
", partitionHash=" + partitionHash + ", partitionHash=" + partitionHash +
", partitionNum=" + partitionNum + ", partitionNum=" + partitionNum +
", totalParts=" + totalParts + ", totalParts=" + totalParts +
", userId=" + userId +
", createTime=" + createTime + ", createTime=" + createTime +
", updateTime=" + updateTime + ", updateTime=" + updateTime +
", deleteTag=" + deleteTag + ", deleteTag=" + deleteTag +
......
package com.tanpu.community.dao.mapper.community;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.baomidou.mybatisplus.core.toolkit.Constants;
import com.tanpu.community.dao.entity.community.ThemeCheckDuplicateEntity;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import org.apache.ibatis.annotations.Update;
/**
* <p>
* 主题文字查重表 Mapper 接口
* </p>
*
* @author xudong
* @since 2021-09-08
*/
public interface ThemeCheckDuplicateMapper extends BaseMapper<ThemeCheckDuplicateEntity> {
@Update("update theme_check_duplicate set delete_tag=1 where theme_id =${id}")
Long deleteByThemeId(@Param("id") String themeId);
@Select("select max(tmp.d) from (" +
"select theme_id ,count(*) as d " +
"from theme_check_duplicate ${ew.customSqlSegment}" +
") tmp")
Integer check(@Param(Constants.WRAPPER) LambdaQueryWrapper wrapper);
}
package com.tanpu.community.dao.mapper.community;
import com.tanpu.community.dao.entity.community.ThemeTextFilterEntity;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
/**
* <p>
* 主题文字查重表 Mapper 接口
* </p>
*
* @author xudong
* @since 2021-09-08
*/
public interface ThemeTextFilterMapper extends BaseMapper<ThemeTextFilterEntity> {
}
...@@ -140,6 +140,9 @@ public class ThemeManager { ...@@ -140,6 +140,9 @@ public class ThemeManager {
@Autowired @Autowired
private NotificationService notificationService; private NotificationService notificationService;
@Autowired
private ThemeTextCheckService themeTextCheckService;
@PostConstruct @PostConstruct
public void init() throws IOException { public void init() throws IOException {
File f = new File(tmpDir); File f = new File(tmpDir);
...@@ -211,6 +214,11 @@ public class ThemeManager { ...@@ -211,6 +214,11 @@ public class ThemeManager {
ThemeEntity themeEntity = new ThemeEntity(); ThemeEntity themeEntity = new ThemeEntity();
BeanUtils.copyProperties(req, themeEntity); BeanUtils.copyProperties(req, themeEntity);
themeEntity.setAuthorId(userId); themeEntity.setAuthorId(userId);
// 文本查重
if (themeTextCheckService.checkDuplicate(ConvertUtil.convert(themeEntity).getTextContent())) {
throw new BizException("圈子里已存在相似内容,请勿重复发布");
}
// 腾讯云敏感词校验 // 腾讯云敏感词校验
checkContent(req); checkContent(req);
themeEntity.setContent(JsonUtil.toJson(req.getContent())); themeEntity.setContent(JsonUtil.toJson(req.getContent()));
...@@ -219,6 +227,7 @@ public class ThemeManager { ...@@ -219,6 +227,7 @@ public class ThemeManager {
if (StringUtils.isBlank(req.getEditThemeId())) { if (StringUtils.isBlank(req.getEditThemeId())) {
// 新建 // 新建
themeService.insertTheme(themeEntity); themeService.insertTheme(themeEntity);
} else { } else {
// 修改 // 修改
themeService.update(themeEntity, req.getEditThemeId()); themeService.update(themeEntity, req.getEditThemeId());
...@@ -233,12 +242,13 @@ public class ThemeManager { ...@@ -233,12 +242,13 @@ public class ThemeManager {
} }
themeAttachmentService.insertList(themeAttachments); themeAttachmentService.insertList(themeAttachments);
ESThemeQo esThemeQo = ConvertUtil.convert(themeEntity);
try { try {
esService.insertOrUpdateTheme(ConvertUtil.convert(themeEntity)); esService.insertOrUpdateTheme(esThemeQo);
} catch (Exception e) { } catch (Exception e) {
log.error("error in save theme to ES. themeId:{}, error:{}", themeEntity.getThemeId(), ExceptionUtils.getStackTrace(e)); log.error("error in save theme to ES. themeId:{}, error:{}", themeEntity.getThemeId(), ExceptionUtils.getStackTrace(e));
} }
themeTextCheckService.insert(esThemeQo.getTextContent(), themeEntity.getThemeId(), userId, themeEntity.getThemeType());
redisCache.evict(StringUtils.joinWith("_", CACHE_THEME_ID, themeEntity.getThemeId())); redisCache.evict(StringUtils.joinWith("_", CACHE_THEME_ID, themeEntity.getThemeId()));
...@@ -247,8 +257,8 @@ public class ThemeManager { ...@@ -247,8 +257,8 @@ public class ThemeManager {
if (1 == req.getSyncToNewComm()) { if (1 == req.getSyncToNewComm()) {
CommonResp response = synchronizeToNewsFeed(req, themeEntity.getThemeId(), userId); CommonResp response = synchronizeToNewsFeed(req, themeEntity.getThemeId(), userId);
if (response.isNotSuccess()) { if (response.isNotSuccess()) {
if ("8001".equals(response.getCode())) { if ("8001".equals(response.getCode()) || ErrorCodeConstant.THEME_SYNCHRONIZE_FAILED.getCode().equals(response.getCode())) {
// 内容受限,不滚发布 // 内容受限,不滚发布
return CommonResp.error(ErrorCodeConstant.THEME_SYNCHRONIZE_FAILED.getCode(), "发布成功,同步失败:" + response.getMsg(), themeResp); return CommonResp.error(ErrorCodeConstant.THEME_SYNCHRONIZE_FAILED.getCode(), "发布成功,同步失败:" + response.getMsg(), themeResp);
} else { } else {
// 其他回滚异常 // 其他回滚异常
...@@ -283,11 +293,12 @@ public class ThemeManager { ...@@ -283,11 +293,12 @@ public class ThemeManager {
imgList.forEach(img -> { imgList.forEach(img -> {
feedList.add(convertImg(img, userId)); feedList.add(convertImg(img, userId));
}); });
} else if (RelTypeEnum.OFFLINE_ACTIVITY.type.equals(themeContentReq.getType())) {
// throw new BizException("线下活动暂时无法同步到专栏");
return CommonResp.error(ErrorCodeConstant.THEME_SYNCHRONIZE_FAILED.getCode(), "线下活动无法同步");
} else { } else {
//其他类型的附件 //其他类型的附件
if (RelTypeEnum.OFFLINE_ACTIVITY.type.equals(themeContentReq.getType())){
throw new BizException("线下活动暂时无法同步到专栏");
}
feedList.add(NewsFeedResReq.builder().relType(Integer.parseInt(themeContentReq.getType())) feedList.add(NewsFeedResReq.builder().relType(Integer.parseInt(themeContentReq.getType()))
.relId(themeContentReq.getValue()) .relId(themeContentReq.getValue())
.productType(themeContentReq.getProductType()) .productType(themeContentReq.getProductType())
...@@ -750,7 +761,7 @@ public class ThemeManager { ...@@ -750,7 +761,7 @@ public class ThemeManager {
// 逻辑删除主题,校验用户 // 逻辑删除主题,校验用户
public void delete(String themeId, String userId) { public void delete(String themeId, String userId) {
themeService.deleteById(themeId, userId); themeService.deleteById(themeId, userId);
themeTextCheckService.deleteByThemeId(themeId);
this.evictThemeCache(themeId); this.evictThemeCache(themeId);
} }
...@@ -939,4 +950,21 @@ public class ThemeManager { ...@@ -939,4 +950,21 @@ public class ThemeManager {
redisCache.evict(StringUtils.joinWith("_", CACHE_THEME_ID, themeId)); redisCache.evict(StringUtils.joinWith("_", CACHE_THEME_ID, themeId));
} }
@Transactional
public void initThemeTextCheck() {
List<ThemeEntity> themeEntities = themeService.queryLatestThemes(30);
List<ThemeQo> themeQos = ConvertUtil.themeEntitiesToDTOs(themeEntities);
for (ThemeQo themeQo : themeQos) {
List<ThemeContentQo> content = themeQo.getContent();
for (ThemeContentQo themeContentQo : content) {
if (themeContentQo.getType().equals(RelTypeEnum.TEXT.type)) {
themeTextCheckService.insertInit(themeContentQo.getValue(), themeQo.getThemeId(), themeQo.getAuthorId(), TimeUtils.getDateTimeOfTimestamp(themeQo.getCreateTime()));
}
}
}
}
} }
package com.tanpu.community.service; package com.tanpu.community.service;
import com.tanpu.community.dao.entity.community.ThemeTextFilterEntity; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.tanpu.community.dao.mapper.community.ThemeTextFilterMapper; import com.tanpu.common.constant.BizStatus;
import com.tanpu.common.exception.BizException;
import com.tanpu.community.dao.entity.community.ThemeCheckDuplicateEntity;
import com.tanpu.community.dao.mapper.community.ThemeCheckDuplicateMapper;
import org.springframework.stereotype.Service;
import javax.annotation.Resource; import javax.annotation.Resource;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
@Service
public class ThemeTextCheckService { public class ThemeTextCheckService {
@Resource @Resource
private ThemeTextFilterMapper themeTextFilterMapper; private ThemeCheckDuplicateMapper themeCheckDuplicateMapper;
public void insert(){
ThemeTextFilterEntity build = ThemeTextFilterEntity.builder().build(); public void insert(String content, String themeId, String userId,Integer themeType) {
themeTextFilterMapper.insert(build); themeCheckDuplicateMapper.deleteByThemeId(themeId);
content = content.replaceAll("[,。!? ]", "。");
String[] split = content.split("。");
for (int i = 0; i < split.length; i++) {
getMD5(split[i]);
insert(themeId, split[i], getMD5(split[i]), i + 1, split.length,userId);
}
}
public void deleteByThemeId(String themeId){
themeCheckDuplicateMapper.deleteByThemeId(themeId);
}
public void insert(String themeId, String partition, String partitionHash, Integer num, Integer total,String userId) {
ThemeCheckDuplicateEntity build = ThemeCheckDuplicateEntity.builder()
.themeId(themeId)
.partitionText(partition)
.partitionHash(partitionHash)
.partitionNum(num)
.totalParts(total)
.userId(userId)
.build();
themeCheckDuplicateMapper.insert(build);
}
public boolean checkDuplicate(String content) {
content = content.replaceAll("[,。!? ]", "。");
String[] split = content.split("。");
List<String> list = new ArrayList<>();
for (int i = 0; i < split.length; i++) {
list.add(getMD5(split[i]));
}
LambdaQueryWrapper<ThemeCheckDuplicateEntity> w = new LambdaQueryWrapper<ThemeCheckDuplicateEntity>().in(ThemeCheckDuplicateEntity::getPartitionHash, list)
.eq(ThemeCheckDuplicateEntity::getDeleteTag, BizStatus.DeleteTag.tag_init)
.groupBy(ThemeCheckDuplicateEntity::getThemeId);
Integer check = themeCheckDuplicateMapper.check(w);
if (check!=null && check*10>=split.length*8){
return true;
}
return false;
}
public static String getMD5(String str) {
try {
// 生成一个MD5加密计算摘要
MessageDigest md = MessageDigest.getInstance("MD5");
// 计算md5函数
md.update(str.getBytes());
// digest()最后确定返回md5 hash值,返回值为8为字符串。因为md5 hash值是16位的hex值,实际上就是8位的字符
// BigInteger函数则将8位的字符串转换成16位hex值,用字符串来表示;得到字符串形式的hash值
return new BigInteger(1, md.digest()).toString(16);
} catch (Exception e) {
throw new BizException("MD5加密出现错误");
}
}
public void insertInit(String content, String themeId,String userId, LocalDateTime createTime) {
themeCheckDuplicateMapper.deleteByThemeId(themeId);
content = content.replaceAll("[,。!? ]", "。");
String[] split = content.split("。");
for (int i = 0; i < split.length; i++) {
getMD5(split[i]);
ThemeCheckDuplicateEntity build = ThemeCheckDuplicateEntity.builder()
.themeId(themeId)
.partitionText(split[i])
.partitionHash(getMD5(split[i]))
.partitionNum(i+1)
.totalParts(split.length)
.userId(userId)
.createTime(createTime)
.updateTime(createTime)
.build();
themeCheckDuplicateMapper.insert(build);
}
} }
} }
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd"> <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.tanpu.community.dao.mapper.community.ThemeTextFilterMapper"> <mapper namespace="com.tanpu.community.dao.mapper.community.ThemeCheckDuplicateMapper">
<!-- 通用查询映射结果 --> <!-- 通用查询映射结果 -->
<resultMap id="BaseResultMap" type="com.tanpu.community.dao.entity.community.ThemeTextFilterEntity"> <resultMap id="BaseResultMap" type="com.tanpu.community.dao.entity.community.ThemeCheckDuplicateEntity">
<id column="id" property="id" /> <id column="id" property="id" />
<result column="theme_id" property="themeId" /> <result column="theme_id" property="themeId" />
<result column="theme_type" property="themeType" /> <result column="theme_type" property="themeType" />
<result column="partition" property="partition" /> <result column="partition_text" property="partitionText" />
<result column="partition_hash" property="partitionHash" /> <result column="partition_hash" property="partitionHash" />
<result column="partition_num" property="partitionNum" /> <result column="partition_num" property="partitionNum" />
<result column="total_parts" property="totalParts" /> <result column="total_parts" property="totalParts" />
<result column="user_id" property="userId" />
<result column="create_time" property="createTime" /> <result column="create_time" property="createTime" />
<result column="update_time" property="updateTime" /> <result column="update_time" property="updateTime" />
<result column="delete_tag" property="deleteTag" /> <result column="delete_tag" property="deleteTag" />
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment