headless integrates knowledge (#722)

This commit is contained in:
daikon
2024-02-05 20:30:57 +08:00
committed by GitHub
parent 74d0ec2b23
commit 9600456bae
174 changed files with 1908 additions and 1817 deletions

View File

@@ -0,0 +1,22 @@
package com.tencent.supersonic.headless.server.persistence.dataobject;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import java.util.Date;
@Data
@TableName("s2_dictionary_conf")
public class DictConfDO {
@TableId(type = IdType.AUTO)
private Long id;
private String description;
private String type;
private Long itemId;
private String config;
private String status;
private Date createdAt;
private String createdBy;
}

View File

@@ -0,0 +1,25 @@
package com.tencent.supersonic.headless.server.persistence.dataobject;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import java.util.Date;
@Data
@TableName("s2_dictionary_task")
public class DictTaskDO {
@TableId(type = IdType.AUTO)
private Long id;
private String name;
private String description;
private String type;
private Long itemId;
private String config;
private String status;
private Date createdAt;
private String createdBy;
private Long elapsedMs;
}

View File

@@ -0,0 +1,9 @@
package com.tencent.supersonic.headless.server.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.tencent.supersonic.headless.server.persistence.dataobject.DictConfDO;
import org.apache.ibatis.annotations.Mapper;
@Mapper
public interface DictConfMapper extends BaseMapper<DictConfDO> {
}

View File

@@ -0,0 +1,11 @@
package com.tencent.supersonic.headless.server.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.tencent.supersonic.headless.server.persistence.dataobject.DictTaskDO;
import org.apache.ibatis.annotations.Mapper;
@Mapper
public interface DictTaskMapper extends BaseMapper<DictTaskDO> {
}

View File

@@ -0,0 +1,29 @@
package com.tencent.supersonic.headless.server.persistence.repository;
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
import com.tencent.supersonic.headless.server.persistence.dataobject.DictConfDO;
import com.tencent.supersonic.headless.server.persistence.dataobject.DictTaskDO;
import java.util.List;
public interface DictRepository {
Long addDictConf(DictConfDO dictConfDO);
Long editDictConf(DictConfDO dictConfDO);
List<DictItemResp> queryDictConf(DictItemFilter dictItemFilter);
Long addDictTask(DictTaskDO dictTaskDO);
Long editDictTask(DictTaskDO dictTaskDO);
DictTaskDO queryDictTask(DictItemFilter filter);
DictTaskDO queryDictTaskById(Long id);
DictTaskResp queryLatestDictTask(DictSingleTaskReq taskReq);
}

View File

@@ -0,0 +1,142 @@
package com.tencent.supersonic.headless.server.persistence.repository.impl;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
import com.tencent.supersonic.headless.api.pojo.response.DimensionResp;
import com.tencent.supersonic.headless.server.persistence.dataobject.DictConfDO;
import com.tencent.supersonic.headless.server.persistence.dataobject.DictTaskDO;
import com.tencent.supersonic.headless.server.persistence.mapper.DictConfMapper;
import com.tencent.supersonic.headless.server.persistence.mapper.DictTaskMapper;
import com.tencent.supersonic.headless.server.persistence.repository.DictRepository;
import com.tencent.supersonic.headless.server.service.DimensionService;
import com.tencent.supersonic.headless.server.utils.DictUtils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Repository;
import org.springframework.util.CollectionUtils;
import java.util.List;
import java.util.Objects;
@Slf4j
@Repository
public class DictRepositoryImpl implements DictRepository {
private final DictTaskMapper dictTaskMapper;
private final DictConfMapper dictConfMapper;
private final DictUtils dictConverter;
private final DimensionService dimensionService;
public DictRepositoryImpl(DictTaskMapper dictTaskMapper, DictConfMapper dictConfMapper,
DictUtils dictConverter, DimensionService dimensionService) {
this.dictTaskMapper = dictTaskMapper;
this.dictConfMapper = dictConfMapper;
this.dictConverter = dictConverter;
this.dimensionService = dimensionService;
}
@Override
public Long addDictTask(DictTaskDO dictTaskDO) {
dictTaskMapper.insert(dictTaskDO);
return dictTaskDO.getId();
}
@Override
public Long editDictTask(DictTaskDO dictTaskDO) {
dictTaskMapper.updateById(dictTaskDO);
return dictTaskDO.getId();
}
@Override
public DictTaskDO queryDictTask(DictItemFilter filter) {
QueryWrapper<DictTaskDO> wrapper = new QueryWrapper<>();
if (Objects.nonNull(filter.getItemId())) {
wrapper.lambda().eq(DictTaskDO::getItemId, filter.getItemId());
}
if (Objects.nonNull(filter.getType())) {
wrapper.lambda().eq(DictTaskDO::getType, filter.getType());
}
if (Objects.nonNull(filter.getId())) {
wrapper.lambda().eq(DictTaskDO::getId, filter.getId());
}
List<DictTaskDO> dictTaskDOList = dictTaskMapper.selectList(wrapper);
if (CollectionUtils.isEmpty(dictTaskDOList)) {
return null;
}
return dictTaskDOList.get(0);
}
@Override
public DictTaskDO queryDictTaskById(Long id) {
return dictTaskMapper.selectById(id);
}
@Override
public DictTaskResp queryLatestDictTask(DictSingleTaskReq taskReq) {
DictTaskResp taskResp = new DictTaskResp();
QueryWrapper<DictTaskDO> wrapper = new QueryWrapper<>();
wrapper.lambda().eq(DictTaskDO::getItemId, taskReq.getItemId());
wrapper.lambda().eq(DictTaskDO::getType, taskReq.getType());
List<DictTaskDO> dictTaskDOList = dictTaskMapper.selectList(wrapper);
if (CollectionUtils.isEmpty(dictTaskDOList)) {
return taskResp;
}
taskResp = dictConverter.taskDO2Resp(dictTaskDOList.get(0));
DimensionResp dimension = dimensionService.getDimension(taskReq.getItemId());
taskResp.setBizName(dimension.getBizName());
taskResp.setModelId(dimension.getModelId());
return taskResp;
}
@Override
public Long addDictConf(DictConfDO dictConfDO) {
dictConfMapper.insert(dictConfDO);
return dictConfDO.getId();
}
@Override
public Long editDictConf(DictConfDO dictConfDO) {
DictItemFilter filter = DictItemFilter.builder().type(TypeEnums.valueOf(dictConfDO.getType()))
.itemId(dictConfDO.getItemId())
.status(StatusEnum.ONLINE)
.build();
List<DictConfDO> dictConfDOList = getDictConfDOList(filter);
if (CollectionUtils.isEmpty(dictConfDOList)) {
return -1L;
}
dictConfDO.setId(dictConfDOList.get(0).getId());
dictConfMapper.updateById(dictConfDO);
return dictConfDO.getId();
}
@Override
public List<DictItemResp> queryDictConf(DictItemFilter dictItemFilter) {
List<DictConfDO> dictConfDOList = getDictConfDOList(dictItemFilter);
return dictConverter.dictDOList2Req(dictConfDOList);
}
private List<DictConfDO> getDictConfDOList(DictItemFilter dictItemFilter) {
QueryWrapper<DictConfDO> wrapper = new QueryWrapper<>();
if (Objects.nonNull(dictItemFilter.getId())) {
wrapper.lambda().eq(DictConfDO::getId, dictItemFilter.getId());
}
if (Objects.nonNull(dictItemFilter.getType())) {
wrapper.lambda().eq(DictConfDO::getType, dictItemFilter.getType().name());
}
if (Objects.nonNull(dictItemFilter.getItemId())) {
wrapper.lambda().eq(DictConfDO::getItemId, dictItemFilter.getItemId());
}
if (Objects.nonNull(dictItemFilter.getStatus())) {
wrapper.lambda().eq(DictConfDO::getStatus, dictItemFilter.getStatus().name());
}
List<DictConfDO> dictConfDOList = dictConfMapper.selectList(wrapper);
return dictConfDOList;
}
}

View File

@@ -0,0 +1,132 @@
package com.tencent.supersonic.headless.server.rest;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.auth.api.authentication.utils.UserHolder;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.validation.Valid;
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
import com.tencent.supersonic.headless.api.pojo.request.DictItemReq;
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
import com.tencent.supersonic.headless.server.service.DictConfService;
import com.tencent.supersonic.headless.server.service.DictTaskService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.PutMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.List;
@RestController
@RequestMapping("/api/semantic/knowledge")
public class KnowledgeController {
@Autowired
private DictTaskService taskService;
@Autowired
private DictConfService confService;
/**
* addDictConf-新增item的字典配置
* Add configuration information for dictionary entries
*
* @param dictItemReq
*/
@PostMapping("/conf")
public Long addDictConf(@RequestBody @Valid DictItemReq dictItemReq,
HttpServletRequest request,
HttpServletResponse response) {
User user = UserHolder.findUser(request, response);
return confService.addDictConf(dictItemReq, user);
}
/**
* editDictConf-编辑item的字典配置
* Edit configuration information for dictionary entries
*
* @param dictItemReq
*/
@PutMapping("/conf")
public Long editDictConf(@RequestBody @Valid DictItemReq dictItemReq,
HttpServletRequest request,
HttpServletResponse response) {
User user = UserHolder.findUser(request, response);
return confService.editDictConf(dictItemReq, user);
}
/**
* queryDictConf-查询item的字典配置
* query configuration information for dictionary entries
*
* @param filter
*/
@PostMapping("/conf/query")
public List<DictItemResp> queryDictConf(@RequestBody @Valid DictItemFilter filter,
HttpServletRequest request,
HttpServletResponse response) {
User user = UserHolder.findUser(request, response);
return confService.queryDictConf(filter, user);
}
/**
* addDictTask-实时导入一个item的字典数据
* write specific item values to the knowledge base
*
* @param taskReq
*/
@PostMapping("/task")
public Long addDictTask(@RequestBody DictSingleTaskReq taskReq,
HttpServletRequest request,
HttpServletResponse response) {
User user = UserHolder.findUser(request, response);
return taskService.addDictTask(taskReq, user);
}
/**
* deleteDictTask-实时删除某一个item的字典数据
* remove specific item values from the knowledge base
*
* @param taskReq
*/
@PutMapping("/task/delete")
public Long deleteDictTask(@RequestBody DictSingleTaskReq taskReq,
HttpServletRequest request,
HttpServletResponse response) {
User user = UserHolder.findUser(request, response);
return taskService.deleteDictTask(taskReq, user);
}
/**
* dailyDictTask-手动离线更新所以字典
*/
@PutMapping("/task/all")
public Boolean dailyDictTask(
HttpServletRequest request,
HttpServletResponse response) {
return taskService.dailyDictTask();
}
/**
* queryLatestDictTask-返回最新的字典任务执行情况
*
* @param taskReq
*/
@PostMapping("/task/search")
public DictTaskResp queryLatestDictTask(@RequestBody DictSingleTaskReq taskReq,
HttpServletRequest request,
HttpServletResponse response) {
User user = UserHolder.findUser(request, response);
return taskService.queryLatestDictTask(taskReq, user);
}
}

View File

@@ -0,0 +1,20 @@
package com.tencent.supersonic.headless.server.service;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
import com.tencent.supersonic.headless.api.pojo.request.DictItemReq;
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
import java.util.List;
/**
* Make relevant settings for the dictionary
*/
public interface DictConfService {
Long addDictConf(DictItemReq itemValueReq, User user);
Long editDictConf(DictItemReq itemValueReq, User user);
List<DictItemResp> queryDictConf(DictItemFilter dictItemFilter, User user);
}

View File

@@ -0,0 +1,18 @@
package com.tencent.supersonic.headless.server.service;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
/**
* Manage dictionary tasks
*/
public interface DictTaskService {
Long addDictTask(DictSingleTaskReq taskReq, User user);
Long deleteDictTask(DictSingleTaskReq taskReq, User user);
Boolean dailyDictTask();
DictTaskResp queryLatestDictTask(DictSingleTaskReq taskReq, User user);
}

View File

@@ -0,0 +1,25 @@
package com.tencent.supersonic.headless.server.service;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.knowledge.DictWord;
import com.tencent.supersonic.headless.core.knowledge.HanlpMapResult;
import java.util.List;
import java.util.Set;
public interface KnowledgeService {
List<S2Term> getTerms(String text);
List<HanlpMapResult> prefixSearch(String key, int limit, Set<Long> viewIds);
List<HanlpMapResult> suffixSearch(String key, int limit, Set<Long> viewIds);
void updateSemanticKnowledge(List<DictWord> natures);
void reloadAllData(List<DictWord> natures);
void updateOnlineKnowledge(List<DictWord> natures);
}

View File

@@ -0,0 +1,50 @@
package com.tencent.supersonic.headless.server.service.impl;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
import com.tencent.supersonic.headless.api.pojo.request.DictItemReq;
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
import com.tencent.supersonic.headless.server.persistence.dataobject.DictConfDO;
import com.tencent.supersonic.headless.server.persistence.repository.DictRepository;
import com.tencent.supersonic.headless.server.service.DictConfService;
import com.tencent.supersonic.headless.server.utils.DictUtils;
import org.springframework.stereotype.Service;
import java.util.List;
@Service
public class DictConfServiceImpl implements DictConfService {
private final DictRepository dictRepository;
private final DictUtils dictConverter;
public DictConfServiceImpl(DictRepository dictRepository,
DictUtils dictConverter) {
this.dictRepository = dictRepository;
this.dictConverter = dictConverter;
}
@Override
public Long addDictConf(DictItemReq itemValueReq, User user) {
DictConfDO dictConfDO = dictConverter.generateDictConfDO(itemValueReq, user);
return dictRepository.addDictConf(dictConfDO);
}
@Override
public Long editDictConf(DictItemReq itemValueReq, User user) {
DictConfDO dictConfDO = dictConverter.generateDictConfDO(itemValueReq, user);
dictRepository.editDictConf(dictConfDO);
if (StatusEnum.DELETED.equals(itemValueReq.getStatus())) {
// todo delete dict file and refresh
}
return itemValueReq.getItemId();
}
@Override
public List<DictItemResp> queryDictConf(DictItemFilter dictItemFilter, User user) {
return dictRepository.queryDictConf(dictItemFilter);
}
}

View File

@@ -0,0 +1,150 @@
package com.tencent.supersonic.headless.server.service.impl;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
import com.tencent.supersonic.headless.core.file.FileHandler;
import com.tencent.supersonic.headless.core.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.server.persistence.dataobject.DictTaskDO;
import com.tencent.supersonic.headless.server.persistence.repository.DictRepository;
import com.tencent.supersonic.headless.server.service.DictTaskService;
import com.tencent.supersonic.headless.server.service.KnowledgeService;
import com.tencent.supersonic.headless.server.utils.DictUtils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
@Service
@Slf4j
public class DictTaskServiceImpl implements DictTaskService {
@Value("${dict.flush.enable:true}")
private Boolean dictFlushEnable;
@Value("${dict.flush.daily.enable:true}")
private Boolean dictFlushDailyEnable;
@Value("${dict.file.type:txt}")
private String dictFileType;
private String dimValue = "DimValue_%d_%d";
private final DictRepository dictRepository;
private final DictUtils dictConverter;
private final DictUtils dictUtils;
private final FileHandler fileHandler;
private final KnowledgeService knowledgeService;
public DictTaskServiceImpl(DictRepository dictRepository,
DictUtils dictConverter,
DictUtils dictUtils,
FileHandler fileHandler,
KnowledgeService knowledgeService) {
this.dictRepository = dictRepository;
this.dictConverter = dictConverter;
this.dictUtils = dictUtils;
this.fileHandler = fileHandler;
this.knowledgeService = knowledgeService;
}
@Override
public Long addDictTask(DictSingleTaskReq taskReq, User user) {
if (!dictFlushEnable) {
return 0L;
}
DictItemResp dictItemResp = fetchDictItemResp(taskReq);
return handleDictTaskByItemResp(dictItemResp, user);
}
private Long handleDictTaskByItemResp(DictItemResp dictItemResp, User user) {
DictTaskDO dictTaskDO = dictConverter.generateDictTaskDO(dictItemResp, user);
log.info("[addDictTask] dictTaskDO:{}", dictTaskDO);
dictRepository.addDictTask(dictTaskDO);
Long idInDb = dictTaskDO.getId();
dictItemResp.setId(idInDb);
runDictTask(dictItemResp, user);
return idInDb;
}
private DictItemResp fetchDictItemResp(DictSingleTaskReq taskReq) {
DictItemFilter dictItemFilter = DictItemFilter.builder()
.itemId(taskReq.getItemId())
.type(taskReq.getType())
.build();
List<DictItemResp> dictItemRespList = dictRepository.queryDictConf(dictItemFilter);
if (!CollectionUtils.isEmpty(dictItemRespList)) {
return dictItemRespList.get(0);
}
return null;
}
private void runDictTask(DictItemResp dictItemResp, User user) {
if (Objects.isNull(dictItemResp)) {
return;
}
DictTaskDO dictTaskDO = dictRepository.queryDictTaskById(dictItemResp.getId());
dictTaskDO.setStatus(TaskStatusEnum.RUNNING.getStatus());
dictRepository.editDictTask(dictTaskDO);
// 1.生成item字典数据
List<String> data = dictUtils.fetchItemValue(dictItemResp);
// 2.变更字典文件
String fileName = dictItemResp.fetchDictFileName() + Constants.DOT + dictFileType;
fileHandler.writeFile(data, fileName, false);
// 3.实时变更内存中字典数据
try {
HanlpHelper.reloadCustomDictionary();
dictTaskDO.setStatus(TaskStatusEnum.SUCCESS.getStatus());
dictRepository.editDictTask(dictTaskDO);
} catch (IOException e) {
log.error("reloadCustomDictionary error", e);
}
}
@Override
public Long deleteDictTask(DictSingleTaskReq taskReq, User user) {
DictItemResp dictItemResp = fetchDictItemResp(taskReq);
String fileName = dictItemResp.fetchDictFileName() + Constants.DOT + dictFileType;
fileHandler.deleteDictFile(fileName);
try {
HanlpHelper.reloadCustomDictionary();
} catch (Exception e) {
log.error("reloadCustomDictionary error", e);
}
return 0L;
}
@Override
@Scheduled(cron = "${knowledge.dimension.value.cron:0 0 0 * * ?}")
public Boolean dailyDictTask() {
log.info("[dailyDictTask] start");
if (!dictFlushDailyEnable) {
log.info("dictFlushDailyEnable is false, now finish dailyDictTask");
}
DictItemFilter filter = DictItemFilter.builder().status(StatusEnum.ONLINE).build();
List<DictItemResp> dictItemRespList = dictRepository.queryDictConf(filter);
dictItemRespList.stream().forEach(item -> handleDictTaskByItemResp(item, null));
log.info("[dailyDictTask] finish");
return true;
}
@Override
public DictTaskResp queryLatestDictTask(DictSingleTaskReq taskReq, User user) {
return dictRepository.queryLatestDictTask(taskReq);
}
}

View File

@@ -0,0 +1,107 @@
package com.tencent.supersonic.headless.server.service.impl;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.api.pojo.response.ViewResp;
import com.tencent.supersonic.headless.core.knowledge.DictWord;
import com.tencent.supersonic.headless.core.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.core.knowledge.SearchService;
import com.tencent.supersonic.headless.core.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.server.pojo.MetaFilter;
import com.tencent.supersonic.headless.server.service.KnowledgeService;
import com.tencent.supersonic.headless.server.service.ViewService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
@Service
@Slf4j
public class KnowledgeServiceImpl implements KnowledgeService {
private final ViewService viewService;
public KnowledgeServiceImpl(ViewService viewService) {
this.viewService = viewService;
}
@Override
public void updateSemanticKnowledge(List<DictWord> natures) {
List<DictWord> prefixes = natures.stream()
.filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
.collect(Collectors.toList());
for (DictWord nature : prefixes) {
HanlpHelper.addToCustomDictionary(nature);
}
List<DictWord> suffixes = natures.stream()
.filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
.collect(Collectors.toList());
SearchService.loadSuffix(suffixes);
}
@Override
public void reloadAllData(List<DictWord> natures) {
// 1. reload custom knowledge
try {
HanlpHelper.reloadCustomDictionary();
} catch (Exception e) {
log.error("reloadCustomDictionary error", e);
}
// 2. update online knowledge
updateOnlineKnowledge(natures);
}
@Override
public void updateOnlineKnowledge(List<DictWord> natures) {
try {
updateSemanticKnowledge(natures);
} catch (Exception e) {
log.error("updateSemanticKnowledge error", e);
}
}
@Override
public List<S2Term> getTerms(String text) {
return HanlpHelper.getTerms(text);
}
@Override
public List<HanlpMapResult> prefixSearch(String key, int limit, Set<Long> viewIds) {
return prefixSearch(key, limit, viewIds2ModelIdList(viewIds));
}
public List<HanlpMapResult> prefixSearchByModel(String key, int limit, Set<Long> models) {
return SearchService.prefixSearch(key, limit, models);
}
@Override
public List<HanlpMapResult> suffixSearch(String key, int limit, Set<Long> viewIds) {
return suffixSearch(key, limit, viewIds2ModelIdList(viewIds));
}
public List<HanlpMapResult> suffixSearchByModel(String key, int limit, Set<Long> models) {
return SearchService.suffixSearch(key, limit, models);
}
private Set<Long> viewIds2ModelIdList(Set<Long> viewIds) {
Set<Long> modelIds = new HashSet<>();
MetaFilter filter = new MetaFilter();
filter.setIds(new ArrayList<>(viewIds));
List<ViewResp> viewList = viewService.getViewList(filter);
if (CollectionUtils.isEmpty(viewList)) {
return modelIds;
}
viewList.stream().forEach(view -> modelIds.addAll(view.getAllModels()));
return modelIds;
}
}

View File

@@ -0,0 +1,356 @@
package com.tencent.supersonic.headless.server.utils;
import com.google.common.base.Strings;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.Aggregator;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.DateConf;
import com.tencent.supersonic.common.pojo.Filter;
import com.tencent.supersonic.common.pojo.Order;
import com.tencent.supersonic.common.pojo.enums.AggOperatorEnum;
import com.tencent.supersonic.common.pojo.enums.FilterOperatorEnum;
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.headless.api.pojo.Dim;
import com.tencent.supersonic.headless.api.pojo.ItemValueConfig;
import com.tencent.supersonic.headless.api.pojo.request.DictItemReq;
import com.tencent.supersonic.headless.api.pojo.request.QuerySqlReq;
import com.tencent.supersonic.headless.api.pojo.request.QueryStructReq;
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
import com.tencent.supersonic.headless.api.pojo.response.DimensionResp;
import com.tencent.supersonic.headless.api.pojo.response.MetricResp;
import com.tencent.supersonic.headless.api.pojo.response.ModelResp;
import com.tencent.supersonic.headless.api.pojo.response.SemanticQueryResp;
import com.tencent.supersonic.headless.server.persistence.dataobject.DictConfDO;
import com.tencent.supersonic.headless.server.persistence.dataobject.DictTaskDO;
import com.tencent.supersonic.headless.server.service.DimensionService;
import com.tencent.supersonic.headless.server.service.MetricService;
import com.tencent.supersonic.headless.server.service.ModelService;
import com.tencent.supersonic.headless.server.service.QueryService;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.StringJoiner;
import static com.tencent.supersonic.common.pojo.Constants.AND_UPPER;
import static com.tencent.supersonic.common.pojo.Constants.APOSTROPHE;
import static com.tencent.supersonic.common.pojo.Constants.COMMA;
import static com.tencent.supersonic.common.pojo.Constants.SPACE;
@Component
public class DictUtils {
private static String dateTimeFormatter = "yyyyMMddHHmmss";
@Value("${dimension.multi.value.split:#}")
private String dimMultiValueSplit;
@Value("${item.value.max.count:100000}")
private Long itemValueMaxCount;
@Value("${item.value.white.frequency:999999}")
private Long itemValueWhiteFrequency;
@Value("${item.value.date.start:1}")
private Integer itemValueDateStart;
@Value("${item.value.date.end:1}")
private Integer itemValueDateEnd;
private final DimensionService dimensionService;
private final MetricService metricService;
private final QueryService queryService;
private final ModelService modelService;
public DictUtils(DimensionService dimensionService,
MetricService metricService,
QueryService queryService,
ModelService modelService) {
this.dimensionService = dimensionService;
this.metricService = metricService;
this.queryService = queryService;
this.modelService = modelService;
}
public String fetchDictFileName(DictItemResp dictItemResp) {
return String.format("dic_value_%d_%s_%s", dictItemResp.getModelId(), dictItemResp.getType().name(),
dictItemResp.getItemId());
}
public DictTaskDO generateDictTaskDO(DictItemResp dictItemResp, User user) {
DictTaskDO taskDO = new DictTaskDO();
Date createAt = new Date();
String name = dictItemResp.fetchDictFileName();
taskDO.setName(name);
taskDO.setType(dictItemResp.getType().name());
taskDO.setItemId(dictItemResp.getItemId());
taskDO.setConfig(JsonUtil.toString(dictItemResp.getConfig()));
taskDO.setStatus(TaskStatusEnum.PENDING.getStatus());
taskDO.setCreatedAt(createAt);
String creator = (Objects.isNull(user) || Strings.isNullOrEmpty(user.getName())) ? "" : user.getName();
taskDO.setCreatedBy(creator);
return taskDO;
}
public DictConfDO generateDictConfDO(DictItemReq itemValueReq, User user) {
DictConfDO confDO = new DictConfDO();
BeanUtils.copyProperties(itemValueReq, confDO);
confDO.setType(itemValueReq.getType().name());
confDO.setConfig(JsonUtil.toString(itemValueReq.getConfig()));
Date createAt = new Date();
confDO.setCreatedAt(createAt);
String creator = Strings.isNullOrEmpty(user.getName()) ? "" : user.getName();
confDO.setCreatedBy(creator);
confDO.setStatus(itemValueReq.getStatus().name());
return confDO;
}
public List<DictItemResp> dictDOList2Req(List<DictConfDO> dictConfDOList) {
List<DictItemResp> dictItemReqList = new ArrayList<>();
dictConfDOList.stream().forEach(conf -> dictItemReqList.add(dictDO2Req(conf)));
return dictItemReqList;
}
public DictItemResp dictDO2Req(DictConfDO dictConfDO) {
DictItemResp dictItemResp = new DictItemResp();
BeanUtils.copyProperties(dictConfDO, dictItemResp);
dictItemResp.setType(TypeEnums.valueOf(dictConfDO.getType()));
dictItemResp.setConfig(JsonUtil.toObject(dictConfDO.getConfig(), ItemValueConfig.class));
dictItemResp.setStatus(StatusEnum.of(dictConfDO.getStatus()));
if (TypeEnums.DIMENSION.equals(TypeEnums.valueOf(dictConfDO.getType()))) {
DimensionResp dimension = dimensionService.getDimension(dictConfDO.getItemId());
dictItemResp.setModelId(dimension.getModelId());
dictItemResp.setBizName(dimension.getBizName());
}
return dictItemResp;
}
public List<String> fetchItemValue(DictItemResp dictItemResp) {
List<String> lines = new ArrayList<>();
QuerySqlReq querySqlReq = constructQueryReq(dictItemResp);
querySqlReq.setNeedAuth(false);
String bizName = dictItemResp.getBizName();
try {
SemanticQueryResp semanticQueryResp = queryService.queryByReq(querySqlReq, null);
if (Objects.isNull(semanticQueryResp) || CollectionUtils.isEmpty(semanticQueryResp.getResultList())) {
return lines;
}
Map<String, Long> valueAndFrequencyPair = new HashMap<>(2000);
for (Map<String, Object> line : semanticQueryResp.getResultList()) {
if (CollectionUtils.isEmpty(line) || !line.containsKey(bizName)
|| line.get(bizName) == null || line.size() != 2) {
continue;
}
String dimValue = line.get(bizName).toString();
Object metricObject = null;
for (String key : line.keySet()) {
if (!bizName.equalsIgnoreCase(key)) {
metricObject = line.get(key);
}
}
if (!Strings.isNullOrEmpty(dimValue) && Objects.nonNull(metricObject)) {
Long metric = Math.round(Double.parseDouble(metricObject.toString()));
mergeMultivaluedValue(valueAndFrequencyPair, dimValue, metric);
}
}
String nature = dictItemResp.getNature();
constructDictLines(valueAndFrequencyPair, lines, nature);
addWhiteValueLines(dictItemResp, lines, nature);
} catch (Exception e) {
e.printStackTrace();
}
return lines;
}
private void addWhiteValueLines(DictItemResp dictItemResp, List<String> lines, String nature) {
if (Objects.isNull(dictItemResp) || Objects.isNull(dictItemResp.getConfig())
|| CollectionUtils.isEmpty(dictItemResp.getConfig().getWhiteList())) {
return;
}
List<String> whiteList = dictItemResp.getConfig().getWhiteList();
whiteList.forEach(white -> lines.add(String.format("%s %s %s", white, nature, itemValueWhiteFrequency)));
}
private void constructDictLines(Map<String, Long> valueAndFrequencyPair, List<String> lines, String nature) {
if (CollectionUtils.isEmpty(valueAndFrequencyPair)) {
return;
}
valueAndFrequencyPair.forEach((value, frequency) -> {
lines.add(String.format("%s %s %s", value, nature, frequency));
});
}
private void mergeMultivaluedValue(Map<String, Long> valueAndFrequencyPair, String dimValue, Long metric) {
if (org.apache.logging.log4j.util.Strings.isEmpty(dimValue)) {
return;
}
Map<String, Long> tmp = new HashMap<>();
if (dimValue.contains(dimMultiValueSplit)) {
Arrays.stream(dimValue.split(dimMultiValueSplit))
.forEach(dimValueSingle -> tmp.put(dimValueSingle, metric));
} else {
tmp.put(dimValue, metric);
}
for (String value : tmp.keySet()) {
long metricOld = valueAndFrequencyPair.containsKey(value) ? valueAndFrequencyPair.get(value) : 0L;
valueAndFrequencyPair.put(value, metric + metricOld);
}
}
private QuerySqlReq constructQueryReq(DictItemResp dictItemResp) {
if (Objects.nonNull(dictItemResp) && Objects.nonNull(dictItemResp.getConfig())
&& Objects.nonNull(dictItemResp.getConfig().getMetricId())) {
// 查询默认指标
QueryStructReq queryStructReq = generateQueryStruct(dictItemResp);
return queryStructReq.convert(queryStructReq, true);
}
// count(1) 作为指标
return constructQuerySqlReq(dictItemResp);
}
private QuerySqlReq constructQuerySqlReq(DictItemResp dictItemResp) {
String sqlPattern = "select %s,count(1) from tbl %s group by %s order by count(1) desc limit %d";
String bizName = dictItemResp.getBizName();
String whereStr = generateWhereStr(dictItemResp);
String where = Strings.isNullOrEmpty(whereStr) ? "" : "WHERE" + whereStr;
ItemValueConfig config = dictItemResp.getConfig();
Long limit = (Objects.isNull(config) || Objects.isNull(config.getLimit())) ? itemValueMaxCount :
dictItemResp.getConfig().getLimit();
String sql = String.format(sqlPattern, bizName, where, bizName, limit);
Set<Long> modelIds = new HashSet<>();
modelIds.add(dictItemResp.getModelId());
QuerySqlReq querySqlReq = new QuerySqlReq();
querySqlReq.setSql(sql);
querySqlReq.setNeedAuth(false);
querySqlReq.setModelIds(modelIds);
return querySqlReq;
}
private QueryStructReq generateQueryStruct(DictItemResp dictItemResp) {
QueryStructReq queryStructReq = new QueryStructReq();
Set<Long> modelIds = new HashSet<>(Arrays.asList(dictItemResp.getModelId()));
queryStructReq.setModelIds(modelIds);
List<String> groups = new ArrayList<>(Arrays.asList(dictItemResp.getBizName()));
queryStructReq.setGroups(groups);
List<Filter> filters = generateFilters(dictItemResp);
queryStructReq.setDimensionFilters(filters);
List<Aggregator> aggregators = new ArrayList<>();
Long metricId = dictItemResp.getConfig().getMetricId();
MetricResp metric = metricService.getMetric(metricId);
String metricBizName = metric.getBizName();
aggregators.add(new Aggregator(metricBizName, AggOperatorEnum.SUM));
queryStructReq.setAggregators(aggregators);
List<Order> orders = new ArrayList<>();
orders.add(new Order(metricBizName, Constants.DESC_UPPER));
queryStructReq.setOrders(orders);
fillStructDateInfo(queryStructReq, dictItemResp);
Long limit = Objects.isNull(dictItemResp.getConfig().getLimit()) ? itemValueMaxCount :
dictItemResp.getConfig().getLimit();
queryStructReq.setLimit(limit);
queryStructReq.setNeedAuth(false);
return queryStructReq;
}
private void fillStructDateInfo(QueryStructReq queryStructReq, DictItemResp dictItemResp) {
ModelResp model = modelService.getModel(dictItemResp.getModelId());
if (Objects.nonNull(model)) {
List<Dim> timeDims = model.getTimeDimension();
if (!CollectionUtils.isEmpty(timeDims)) {
DateConf dateConf = new DateConf();
dateConf.setDateMode(DateConf.DateMode.BETWEEN);
String format = timeDims.get(0).getDateFormat();
String start = LocalDate.now().minusDays(itemValueDateStart)
.format(DateTimeFormatter.ofPattern(format));
String end = LocalDate.now().minusDays(itemValueDateEnd)
.format(DateTimeFormatter.ofPattern(format));
dateConf.setStartDate(start);
dateConf.setEndDate(end);
queryStructReq.setDateInfo(dateConf);
}
}
}
private List<Filter> generateFilters(DictItemResp dictItemResp) {
List<Filter> filters = new ArrayList<>();
if (Objects.isNull(dictItemResp)) {
return new ArrayList<>();
}
String whereStr = generateWhereStr(dictItemResp);
if (Strings.isNullOrEmpty(whereStr)) {
return new ArrayList<>();
}
Filter filter = new Filter("", FilterOperatorEnum.SQL_PART, whereStr);
filters.add(filter);
return filters;
}
public String generateWhereStr(DictItemResp dictItemResp) {
StringJoiner joiner = new StringJoiner(SPACE + AND_UPPER + SPACE);
String bizName = dictItemResp.getBizName();
ItemValueConfig config = dictItemResp.getConfig();
if (Objects.nonNull(config)) {
if (!CollectionUtils.isEmpty(config.getBlackList())) {
StringJoiner joinerBlack = new StringJoiner(COMMA);
config.getBlackList().stream().forEach(black -> joinerBlack.add(APOSTROPHE + black + APOSTROPHE));
joiner.add(String.format("(%s not in (%s))", bizName, joinerBlack.toString()));
}
if (!CollectionUtils.isEmpty(config.getRuleList())) {
config.getRuleList().stream().forEach(rule -> joiner.add("(" + rule + ")"));
}
}
ModelResp model = modelService.getModel(dictItemResp.getModelId());
if (Objects.nonNull(model)) {
List<Dim> timeDims = model.getTimeDimension();
if (!CollectionUtils.isEmpty(timeDims)) {
String format = timeDims.get(0).getDateFormat();
String start = LocalDate.now().minusDays(itemValueDateStart)
.format(DateTimeFormatter.ofPattern(format));
String end = LocalDate.now().minusDays(itemValueDateEnd)
.format(DateTimeFormatter.ofPattern(format));
joiner.add(String.format("( %s >= '%s' and %s <= '%s' )", TimeDimensionEnum.DAY.getName(), start,
TimeDimensionEnum.DAY.getName(), end));
}
}
return joiner.toString();
}
public DictTaskResp taskDO2Resp(DictTaskDO dictTaskDO) {
DictTaskResp resp = new DictTaskResp();
BeanUtils.copyProperties(dictTaskDO, resp);
resp.setTaskStatus(dictTaskDO.getStatus());
resp.setType(TypeEnums.valueOf(dictTaskDO.getType()));
resp.setConfig(JsonUtil.toObject(dictTaskDO.getConfig(), ItemValueConfig.class));
return resp;
}
}