Merge remote-tracking branch 'ssbi/master' into ssmaster

This commit is contained in:
jerryjzhang
2024-12-02 00:00:50 +08:00
5 changed files with 77 additions and 2 deletions

View File

@@ -19,6 +19,7 @@ public class DictWord {
private String word;
private String nature;
private String natureWithFrequency;
private String alias;
@Override
public boolean equals(Object o) {

View File

@@ -1,11 +1,14 @@
package com.tencent.supersonic.headless.chat.knowledge;
import com.baomidou.mybatisplus.core.toolkit.CollectionUtils;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -14,6 +17,31 @@ import java.util.stream.Collectors;
@Service
@Slf4j
public class KnowledgeBaseService {
private static volatile Map<Long, List<DictWord>> dimValueAliasMap = new HashMap<>();
public static Map<Long, List<DictWord>> getDimValueAlias() {
return dimValueAliasMap;
}
public static List<DictWord> addDimValueAlias(Long dimId, List<DictWord> newWords) {
List<DictWord> dimValueAlias =
dimValueAliasMap.containsKey(dimId) ? dimValueAliasMap.get(dimId)
: new ArrayList<>();
Set<String> wordSet =
dimValueAlias
.stream().map(word -> String.format("%s_%s_%s",
word.getNatureWithFrequency(), word.getWord(), word.getAlias()))
.collect(Collectors.toSet());
for (DictWord dictWord : newWords) {
String key = String.format("%s_%s_%s", dictWord.getNatureWithFrequency(),
dictWord.getWord(), dictWord.getAlias());
if (!wordSet.contains(key)) {
dimValueAlias.add(dictWord);
}
}
dimValueAliasMap.put(dimId, dimValueAlias);
return dimValueAlias;
}
public void updateSemanticKnowledge(List<DictWord> natures) {
@@ -41,6 +69,11 @@ public class KnowledgeBaseService {
}
// 2. update online knowledge
if (CollectionUtils.isNotEmpty(dimValueAliasMap)) {
for (Long dimId : dimValueAliasMap.keySet()) {
natures.addAll(dimValueAliasMap.get(dimId));
}
}
updateOnlineKnowledge(natures);
}

View File

@@ -12,6 +12,8 @@ import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.LexiconUtility;
import com.hankcs.hanlp.utility.TextUtility;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
import java.io.BufferedOutputStream;
@@ -103,7 +105,22 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
String word = getWordBySpace(param[0]);
if (isLetters) {
original = word;
word = word.toLowerCase();
// word = word.toLowerCase();
// 加入小写别名
if (!original.equals(word.toLowerCase())) {
DictWord dictWord = new DictWord();
String nature = param[1];
dictWord.setNatureWithFrequency(
String.format("%s " + Constants.DEFAULT_FREQUENCY, nature));
dictWord.setWord(word);
dictWord.setAlias(word.toLowerCase());
String[] split = nature.split(DictWordType.NATURE_SPILT);
if (split.length >= 2) {
Long dimId = Long.parseLong(
nature.split(DictWordType.NATURE_SPILT)[split.length - 1]);
KnowledgeBaseService.addDimValueAlias(dimId, Arrays.asList(dictWord));
}
}
}
if (natureCount == 0) {
attribute = new CoreDictionary.Attribute(defaultNature);

View File

@@ -8,12 +8,15 @@ import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.chat.ChatQueryContext;
import com.tencent.supersonic.headless.chat.knowledge.DatabaseMapResult;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.chat.knowledge.KnowledgeBaseService;
import com.tencent.supersonic.headless.chat.knowledge.builder.BaseWordBuilder;
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.chat.knowledge.helper.NatureHelper;
import com.tencent.supersonic.headless.chat.utils.EditDistanceUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;
import java.util.HashSet;
@@ -83,12 +86,32 @@ public class KeywordMapper extends BaseMapper {
.element(element).frequency(frequency).word(hanlpMapResult.getName())
.similarity(hanlpMapResult.getSimilarity())
.detectWord(hanlpMapResult.getDetectWord()).build();
// doDimValueAliasLogic 将维度值别名进行替换成真实维度值
doDimValueAliasLogic(schemaElementMatch);
addToSchemaMap(chatQueryContext.getMapInfo(), dataSetId, schemaElementMatch);
}
}
}
private void doDimValueAliasLogic(SchemaElementMatch schemaElementMatch) {
SchemaElement element = schemaElementMatch.getElement();
if (SchemaElementType.VALUE.equals(element.getType())) {
Long dimId = element.getId();
String word = schemaElementMatch.getWord();
Map<Long, List<DictWord>> dimValueAlias = KnowledgeBaseService.getDimValueAlias();
if (Objects.nonNull(dimId) && StringUtils.isNotEmpty(word)
&& dimValueAlias.containsKey(dimId)) {
Map<String, DictWord> aliasAndDictMap = dimValueAlias.get(dimId).stream()
.collect(Collectors.toMap(dictWord -> dictWord.getAlias(),
dictWord -> dictWord, (v1, v2) -> v2));
if (aliasAndDictMap.containsKey(word)) {
String wordTech = aliasAndDictMap.get(word).getWord();
schemaElementMatch.setWord(wordTech);
}
}
}
}
private void convertMapResultToMapInfo(ChatQueryContext chatQueryContext,
List<DatabaseMapResult> mapResults) {
for (DatabaseMapResult match : mapResults) {

View File

@@ -118,6 +118,7 @@ public class DictRepositoryImpl implements DictRepository {
wrapper.lambda().and(qw -> qw.like(DictTaskDO::getName, key).or()
.like(DictTaskDO::getDescription, key).or().like(DictTaskDO::getConfig, key));
}
wrapper.lambda().orderByDesc(DictTaskDO::getCreatedAt);
return dictTaskMapper.selectList(wrapper);
}