mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 11:07:06 +00:00
[improvement](Dict) add dimValueAliasMap info for KnowledgeBaseService
This commit is contained in:
@@ -19,6 +19,7 @@ public class DictWord {
|
||||
private String word;
|
||||
private String nature;
|
||||
private String natureWithFrequency;
|
||||
private String alias;
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
package com.tencent.supersonic.headless.chat.knowledge;
|
||||
|
||||
import com.baomidou.mybatisplus.core.toolkit.CollectionUtils;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@@ -14,6 +17,31 @@ import java.util.stream.Collectors;
|
||||
@Service
|
||||
@Slf4j
|
||||
public class KnowledgeBaseService {
|
||||
private static volatile Map<Long, List<DictWord>> dimValueAliasMap = new HashMap<>();
|
||||
|
||||
public static Map<Long, List<DictWord>> getDimValueAlias() {
|
||||
return dimValueAliasMap;
|
||||
}
|
||||
|
||||
public static List<DictWord> addDimValueAlias(Long dimId, List<DictWord> newWords) {
|
||||
List<DictWord> dimValueAlias =
|
||||
dimValueAliasMap.containsKey(dimId) ? dimValueAliasMap.get(dimId)
|
||||
: new ArrayList<>();
|
||||
Set<String> wordSet =
|
||||
dimValueAlias
|
||||
.stream().map(word -> String.format("%s_%s_%s",
|
||||
word.getNatureWithFrequency(), word.getWord(), word.getAlias()))
|
||||
.collect(Collectors.toSet());
|
||||
for (DictWord dictWord : newWords) {
|
||||
String key = String.format("%s_%s_%s", dictWord.getNatureWithFrequency(),
|
||||
dictWord.getWord(), dictWord.getAlias());
|
||||
if (!wordSet.contains(key)) {
|
||||
dimValueAlias.add(dictWord);
|
||||
}
|
||||
}
|
||||
dimValueAliasMap.put(dimId, dimValueAlias);
|
||||
return dimValueAlias;
|
||||
}
|
||||
|
||||
public void updateSemanticKnowledge(List<DictWord> natures) {
|
||||
|
||||
@@ -41,6 +69,11 @@ public class KnowledgeBaseService {
|
||||
}
|
||||
|
||||
// 2. update online knowledge
|
||||
if (CollectionUtils.isNotEmpty(dimValueAliasMap)) {
|
||||
for (Long dimId : dimValueAliasMap.keySet()) {
|
||||
natures.addAll(dimValueAliasMap.get(dimId));
|
||||
}
|
||||
}
|
||||
updateOnlineKnowledge(natures);
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ import com.hankcs.hanlp.dictionary.other.CharTable;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.hankcs.hanlp.utility.LexiconUtility;
|
||||
import com.hankcs.hanlp.utility.TextUtility;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
@@ -103,7 +105,22 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
|
||||
String word = getWordBySpace(param[0]);
|
||||
if (isLetters) {
|
||||
original = word;
|
||||
word = word.toLowerCase();
|
||||
// word = word.toLowerCase();
|
||||
// 加入小写别名
|
||||
if (!original.equals(word.toLowerCase())) {
|
||||
DictWord dictWord = new DictWord();
|
||||
String nature = param[1];
|
||||
dictWord.setNatureWithFrequency(
|
||||
String.format("%s " + Constants.DEFAULT_FREQUENCY, nature));
|
||||
dictWord.setWord(word);
|
||||
dictWord.setAlias(word.toLowerCase());
|
||||
String[] split = nature.split(DictWordType.NATURE_SPILT);
|
||||
if (split.length >= 2) {
|
||||
Long dimId = Long.parseLong(
|
||||
nature.split(DictWordType.NATURE_SPILT)[split.length - 1]);
|
||||
KnowledgeBaseService.addDimValueAlias(dimId, Arrays.asList(dictWord));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (natureCount == 0) {
|
||||
attribute = new CoreDictionary.Attribute(defaultNature);
|
||||
|
||||
@@ -8,12 +8,15 @@ import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
|
||||
import com.tencent.supersonic.headless.chat.ChatQueryContext;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.DatabaseMapResult;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.HanlpMapResult;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.KnowledgeBaseService;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.builder.BaseWordBuilder;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.helper.NatureHelper;
|
||||
import com.tencent.supersonic.headless.chat.utils.EditDistanceUtils;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.HashSet;
|
||||
@@ -83,12 +86,32 @@ public class KeywordMapper extends BaseMapper {
|
||||
.element(element).frequency(frequency).word(hanlpMapResult.getName())
|
||||
.similarity(hanlpMapResult.getSimilarity())
|
||||
.detectWord(hanlpMapResult.getDetectWord()).build();
|
||||
|
||||
// doDimValueAliasLogic 将维度值别名进行替换成真实维度值
|
||||
doDimValueAliasLogic(schemaElementMatch);
|
||||
addToSchemaMap(chatQueryContext.getMapInfo(), dataSetId, schemaElementMatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void doDimValueAliasLogic(SchemaElementMatch schemaElementMatch) {
|
||||
SchemaElement element = schemaElementMatch.getElement();
|
||||
if (SchemaElementType.VALUE.equals(element.getType())) {
|
||||
Long dimId = element.getId();
|
||||
String word = schemaElementMatch.getWord();
|
||||
Map<Long, List<DictWord>> dimValueAlias = KnowledgeBaseService.getDimValueAlias();
|
||||
if (Objects.nonNull(dimId) && StringUtils.isNotEmpty(word)
|
||||
&& dimValueAlias.containsKey(dimId)) {
|
||||
Map<String, DictWord> aliasAndDictMap = dimValueAlias.get(dimId).stream()
|
||||
.collect(Collectors.toMap(dictWord -> dictWord.getAlias(),
|
||||
dictWord -> dictWord, (v1, v2) -> v2));
|
||||
if (aliasAndDictMap.containsKey(word)) {
|
||||
String wordTech = aliasAndDictMap.get(word).getWord();
|
||||
schemaElementMatch.setWord(wordTech);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void convertMapResultToMapInfo(ChatQueryContext chatQueryContext,
|
||||
List<DatabaseMapResult> mapResults) {
|
||||
for (DatabaseMapResult match : mapResults) {
|
||||
|
||||
@@ -118,6 +118,7 @@ public class DictRepositoryImpl implements DictRepository {
|
||||
wrapper.lambda().and(qw -> qw.like(DictTaskDO::getName, key).or()
|
||||
.like(DictTaskDO::getDescription, key).or().like(DictTaskDO::getConfig, key));
|
||||
}
|
||||
wrapper.lambda().orderByDesc(DictTaskDO::getCreatedAt);
|
||||
return dictTaskMapper.selectList(wrapper);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user