From 6e3f87101594e45cce81ee5cdffa33a02048fa39 Mon Sep 17 00:00:00 2001 From: lexluo09 <39718951+lexluo09@users.noreply.github.com> Date: Mon, 6 Nov 2023 21:03:38 +0800 Subject: [PATCH] (improvement)(chat) support remove dictionary and rename match strategy (#332) --- .../chat/mapper/FuzzyNameMapper.java | 4 ++-- ...ategy.java => FuzzyNameMatchStrategy.java} | 5 ++--- .../chat/mapper/HanlpDictMapper.java | 2 +- ...ategy.java => HanlpDictMatchStrategy.java} | 6 +++--- ...t.java => HanlpDictMatchStrategyTest.java} | 2 +- .../knowledge/service/SearchService.java | 21 ++++++++++++++++--- .../knowledge/utils/HanlpHelper.java | 5 +++++ 7 files changed, 32 insertions(+), 13 deletions(-) rename chat/core/src/main/java/com/tencent/supersonic/chat/mapper/{FuzzyMatchStrategy.java => FuzzyNameMatchStrategy.java} (97%) rename chat/core/src/main/java/com/tencent/supersonic/chat/mapper/{HanlpMatchStrategy.java => HanlpDictMatchStrategy.java} (95%) rename chat/core/src/test/java/com/tencent/supersonic/chat/mapper/match/{HanlpMatchStrategyTest.java => HanlpDictMatchStrategyTest.java} (82%) diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMapper.java b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMapper.java index a6fb1e85c..399a88ae0 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMapper.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMapper.java @@ -27,11 +27,11 @@ public class FuzzyNameMapper extends BaseMapper { List terms = HanlpHelper.getTerms(queryContext.getRequest().getQueryText()); - FuzzyMatchStrategy fuzzyMatchStrategy = ContextUtils.getBean(FuzzyMatchStrategy.class); + FuzzyNameMatchStrategy fuzzyNameMatchStrategy = ContextUtils.getBean(FuzzyNameMatchStrategy.class); MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class); - List matches = fuzzyMatchStrategy.getMatches(queryContext, terms); + List matches = fuzzyNameMatchStrategy.getMatches(queryContext, terms); for (FuzzyResult match : matches) { SchemaElement schemaElement = match.getSchemaElement(); diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyMatchStrategy.java b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMatchStrategy.java similarity index 97% rename from chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyMatchStrategy.java rename to chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMatchStrategy.java index 80f3c9637..9a1619418 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyMatchStrategy.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMatchStrategy.java @@ -22,11 +22,11 @@ import org.springframework.stereotype.Service; import org.springframework.util.CollectionUtils; /** - * Fuzzy Match Strategy + * Fuzzy Name Match Strategy */ @Service @Slf4j -public class FuzzyMatchStrategy extends BaseMatchStrategy { +public class FuzzyNameMatchStrategy extends BaseMatchStrategy { @Autowired private OptimizationConfig optimizationConfig; @@ -59,7 +59,6 @@ public class FuzzyMatchStrategy extends BaseMatchStrategy { public void detectByStep(QueryContext queryContext, Set existResults, Set detectModelIds, Integer startIndex, Integer index, int offset) { String detectSegment = queryContext.getRequest().getQueryText().substring(startIndex, index); - // step1. build query params if (StringUtils.isBlank(detectSegment)) { return; } diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMapper.java b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMapper.java index 173227635..3054b90e2 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMapper.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMapper.java @@ -30,7 +30,7 @@ public class HanlpDictMapper extends BaseMapper { String queryText = queryContext.getRequest().getQueryText(); List terms = HanlpHelper.getTerms(queryText); - HanlpMatchStrategy matchStrategy = ContextUtils.getBean(HanlpMatchStrategy.class); + HanlpDictMatchStrategy matchStrategy = ContextUtils.getBean(HanlpDictMatchStrategy.class); List matches = matchStrategy.getMatches(queryContext, terms); diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpMatchStrategy.java b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMatchStrategy.java similarity index 95% rename from chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpMatchStrategy.java rename to chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMatchStrategy.java index 0d8136901..61e6a4e7b 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpMatchStrategy.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMatchStrategy.java @@ -25,7 +25,7 @@ import org.springframework.stereotype.Service; */ @Service @Slf4j -public class HanlpMatchStrategy extends BaseMatchStrategy { +public class HanlpDictMatchStrategy extends BaseMatchStrategy { @Autowired private MapperHelper mapperHelper; @@ -71,8 +71,8 @@ public class HanlpMatchStrategy extends BaseMatchStrategy { detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new)); // step2. suffix search LinkedHashSet suffixHanlpMapResults = SearchService.suffixSearch(detectSegment, - oneDetectionMaxSize, - agentId, detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new)); + oneDetectionMaxSize, agentId, detectModelIds).stream() + .collect(Collectors.toCollection(LinkedHashSet::new)); hanlpMapResults.addAll(suffixHanlpMapResults); diff --git a/chat/core/src/test/java/com/tencent/supersonic/chat/mapper/match/HanlpMatchStrategyTest.java b/chat/core/src/test/java/com/tencent/supersonic/chat/mapper/match/HanlpDictMatchStrategyTest.java similarity index 82% rename from chat/core/src/test/java/com/tencent/supersonic/chat/mapper/match/HanlpMatchStrategyTest.java rename to chat/core/src/test/java/com/tencent/supersonic/chat/mapper/match/HanlpDictMatchStrategyTest.java index 34d913837..6444822c9 100644 --- a/chat/core/src/test/java/com/tencent/supersonic/chat/mapper/match/HanlpMatchStrategyTest.java +++ b/chat/core/src/test/java/com/tencent/supersonic/chat/mapper/match/HanlpDictMatchStrategyTest.java @@ -6,7 +6,7 @@ import org.junit.jupiter.api.Test; /** * MatchStrategyImplTest */ -class HanlpMatchStrategyTest extends ContextTest { +class HanlpDictMatchStrategyTest extends ContextTest { @Test void match() { diff --git a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/service/SearchService.java b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/service/SearchService.java index 7db89361a..0e0333cd0 100644 --- a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/service/SearchService.java +++ b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/service/SearchService.java @@ -11,6 +11,7 @@ import com.tencent.supersonic.knowledge.dictionary.HanlpMapResult; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -114,7 +115,7 @@ public class SearchService { } public static void put(String key, CoreDictionary.Attribute attribute) { - trie.put(key, Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList())); + trie.put(key, getValue(attribute.nature)); } @@ -138,9 +139,23 @@ public class SearchService { } public static void putSuffix(String key, CoreDictionary.Attribute attribute) { - suffixTrie.put(key, - Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList())); + Nature[] nature = attribute.nature; + suffixTrie.put(key, getValue(nature)); } + private static List getValue(Nature[] nature) { + return Arrays.stream(nature).map(entry -> entry.toString()).collect(Collectors.toList()); + } + + public static void remove(DictWord dictWord, Nature[] natures) { + trie.remove(dictWord.getWord()); + if (Objects.nonNull(natures) && natures.length > 0) { + trie.put(dictWord.getWord(), getValue(natures)); + } + if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature() + .contains(DictWordType.DIMENSION.getType())) { + suffixTrie.remove(dictWord.getWord()); + } + } } diff --git a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/utils/HanlpHelper.java b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/utils/HanlpHelper.java index 54702adec..84f1a970d 100644 --- a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/utils/HanlpHelper.java +++ b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/utils/HanlpHelper.java @@ -3,6 +3,7 @@ package com.tencent.supersonic.knowledge.utils; import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath; import com.hankcs.hanlp.HanLP; +import com.hankcs.hanlp.corpus.tag.Nature; import com.hankcs.hanlp.dictionary.CoreDictionary; import com.hankcs.hanlp.dictionary.DynamicCustomDictionary; import com.hankcs.hanlp.seg.Segment; @@ -16,6 +17,7 @@ import com.tencent.supersonic.knowledge.service.SearchService; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; @@ -171,10 +173,12 @@ public class HanlpHelper { log.info("get attribute:{}", attribute); getDynamicCustomDictionary().remove(dictWord.getWord()); StringBuilder sb = new StringBuilder(); + List natureList = new ArrayList<>(); for (int i = 0; i < attribute.nature.length; i++) { if (!attribute.nature[i].toString().equals(dictWord.getNature())) { sb.append(attribute.nature[i].toString() + " "); sb.append(attribute.frequency[i] + " "); + natureList.add((attribute.nature[i])); } } String natureWithFrequency = sb.toString(); @@ -183,6 +187,7 @@ public class HanlpHelper { if (StringUtils.isNotBlank(natureWithFrequency)) { getDynamicCustomDictionary().add(dictWord.getWord(), natureWithFrequency.substring(0, len - 1)); } + SearchService.remove(dictWord, natureList.toArray(new Nature[0])); } public static void transLetterOriginal(List mapResults) {