From efac1efdb04f6a172e5d562901719b17cc04f591 Mon Sep 17 00:00:00 2001 From: daikon12 <1059907724@qq.com> Date: Sun, 1 Dec 2024 23:48:51 +0800 Subject: [PATCH] [improvement](Dict) add dimValueAliasMap info for KnowledgeBaseService --- .../headless/chat/knowledge/DictWord.java | 1 + .../chat/knowledge/KnowledgeBaseService.java | 33 +++++++++++++++++++ .../chat/knowledge/MultiCustomDictionary.java | 19 ++++++++++- .../headless/chat/mapper/KeywordMapper.java | 25 +++++++++++++- .../repository/impl/DictRepositoryImpl.java | 1 + 5 files changed, 77 insertions(+), 2 deletions(-) diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/DictWord.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/DictWord.java index 97bef6f69..698ebafcf 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/DictWord.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/DictWord.java @@ -19,6 +19,7 @@ public class DictWord { private String word; private String nature; private String natureWithFrequency; + private String alias; @Override public boolean equals(Object o) { diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/KnowledgeBaseService.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/KnowledgeBaseService.java index a6dabb953..7cf018360 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/KnowledgeBaseService.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/KnowledgeBaseService.java @@ -1,11 +1,14 @@ package com.tencent.supersonic.headless.chat.knowledge; +import com.baomidou.mybatisplus.core.toolkit.CollectionUtils; import com.tencent.supersonic.common.pojo.enums.DictWordType; import com.tencent.supersonic.headless.api.pojo.response.S2Term; import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -14,6 +17,31 @@ import java.util.stream.Collectors; @Service @Slf4j public class KnowledgeBaseService { + private static volatile Map> dimValueAliasMap = new HashMap<>(); + + public static Map> getDimValueAlias() { + return dimValueAliasMap; + } + + public static List addDimValueAlias(Long dimId, List newWords) { + List dimValueAlias = + dimValueAliasMap.containsKey(dimId) ? dimValueAliasMap.get(dimId) + : new ArrayList<>(); + Set wordSet = + dimValueAlias + .stream().map(word -> String.format("%s_%s_%s", + word.getNatureWithFrequency(), word.getWord(), word.getAlias())) + .collect(Collectors.toSet()); + for (DictWord dictWord : newWords) { + String key = String.format("%s_%s_%s", dictWord.getNatureWithFrequency(), + dictWord.getWord(), dictWord.getAlias()); + if (!wordSet.contains(key)) { + dimValueAlias.add(dictWord); + } + } + dimValueAliasMap.put(dimId, dimValueAlias); + return dimValueAlias; + } public void updateSemanticKnowledge(List natures) { @@ -41,6 +69,11 @@ public class KnowledgeBaseService { } // 2. update online knowledge + if (CollectionUtils.isNotEmpty(dimValueAliasMap)) { + for (Long dimId : dimValueAliasMap.keySet()) { + natures.addAll(dimValueAliasMap.get(dimId)); + } + } updateOnlineKnowledge(natures); } diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/MultiCustomDictionary.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/MultiCustomDictionary.java index c7511326f..79cefa92f 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/MultiCustomDictionary.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/MultiCustomDictionary.java @@ -12,6 +12,8 @@ import com.hankcs.hanlp.dictionary.other.CharTable; import com.hankcs.hanlp.seg.common.Term; import com.hankcs.hanlp.utility.LexiconUtility; import com.hankcs.hanlp.utility.TextUtility; +import com.tencent.supersonic.common.pojo.Constants; +import com.tencent.supersonic.common.pojo.enums.DictWordType; import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper; import java.io.BufferedOutputStream; @@ -103,7 +105,22 @@ public class MultiCustomDictionary extends DynamicCustomDictionary { String word = getWordBySpace(param[0]); if (isLetters) { original = word; - word = word.toLowerCase(); + // word = word.toLowerCase(); + // 加入小写别名 + if (!original.equals(word.toLowerCase())) { + DictWord dictWord = new DictWord(); + String nature = param[1]; + dictWord.setNatureWithFrequency( + String.format("%s " + Constants.DEFAULT_FREQUENCY, nature)); + dictWord.setWord(word); + dictWord.setAlias(word.toLowerCase()); + String[] split = nature.split(DictWordType.NATURE_SPILT); + if (split.length >= 2) { + Long dimId = Long.parseLong( + nature.split(DictWordType.NATURE_SPILT)[split.length - 1]); + KnowledgeBaseService.addDimValueAlias(dimId, Arrays.asList(dictWord)); + } + } } if (natureCount == 0) { attribute = new CoreDictionary.Attribute(defaultNature); diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/KeywordMapper.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/KeywordMapper.java index cfce186d8..33450eda1 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/KeywordMapper.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/KeywordMapper.java @@ -8,12 +8,15 @@ import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo; import com.tencent.supersonic.headless.api.pojo.response.S2Term; import com.tencent.supersonic.headless.chat.ChatQueryContext; import com.tencent.supersonic.headless.chat.knowledge.DatabaseMapResult; +import com.tencent.supersonic.headless.chat.knowledge.DictWord; import com.tencent.supersonic.headless.chat.knowledge.HanlpMapResult; +import com.tencent.supersonic.headless.chat.knowledge.KnowledgeBaseService; import com.tencent.supersonic.headless.chat.knowledge.builder.BaseWordBuilder; import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper; import com.tencent.supersonic.headless.chat.knowledge.helper.NatureHelper; import com.tencent.supersonic.headless.chat.utils.EditDistanceUtils; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; import org.springframework.util.CollectionUtils; import java.util.HashSet; @@ -83,12 +86,32 @@ public class KeywordMapper extends BaseMapper { .element(element).frequency(frequency).word(hanlpMapResult.getName()) .similarity(hanlpMapResult.getSimilarity()) .detectWord(hanlpMapResult.getDetectWord()).build(); - + // doDimValueAliasLogic 将维度值别名进行替换成真实维度值 + doDimValueAliasLogic(schemaElementMatch); addToSchemaMap(chatQueryContext.getMapInfo(), dataSetId, schemaElementMatch); } } } + private void doDimValueAliasLogic(SchemaElementMatch schemaElementMatch) { + SchemaElement element = schemaElementMatch.getElement(); + if (SchemaElementType.VALUE.equals(element.getType())) { + Long dimId = element.getId(); + String word = schemaElementMatch.getWord(); + Map> dimValueAlias = KnowledgeBaseService.getDimValueAlias(); + if (Objects.nonNull(dimId) && StringUtils.isNotEmpty(word) + && dimValueAlias.containsKey(dimId)) { + Map aliasAndDictMap = dimValueAlias.get(dimId).stream() + .collect(Collectors.toMap(dictWord -> dictWord.getAlias(), + dictWord -> dictWord, (v1, v2) -> v2)); + if (aliasAndDictMap.containsKey(word)) { + String wordTech = aliasAndDictMap.get(word).getWord(); + schemaElementMatch.setWord(wordTech); + } + } + } + } + private void convertMapResultToMapInfo(ChatQueryContext chatQueryContext, List mapResults) { for (DatabaseMapResult match : mapResults) { diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/repository/impl/DictRepositoryImpl.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/repository/impl/DictRepositoryImpl.java index 55ffe1d11..77ea46813 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/repository/impl/DictRepositoryImpl.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/repository/impl/DictRepositoryImpl.java @@ -118,6 +118,7 @@ public class DictRepositoryImpl implements DictRepository { wrapper.lambda().and(qw -> qw.like(DictTaskDO::getName, key).or() .like(DictTaskDO::getDescription, key).or().like(DictTaskDO::getConfig, key)); } + wrapper.lambda().orderByDesc(DictTaskDO::getCreatedAt); return dictTaskMapper.selectList(wrapper); }