mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-11 03:58:14 +00:00
(improvement)(chat) support remove dictionary and rename match strategy (#332)
This commit is contained in:
@@ -27,11 +27,11 @@ public class FuzzyNameMapper extends BaseMapper {
|
||||
|
||||
List<Term> terms = HanlpHelper.getTerms(queryContext.getRequest().getQueryText());
|
||||
|
||||
FuzzyMatchStrategy fuzzyMatchStrategy = ContextUtils.getBean(FuzzyMatchStrategy.class);
|
||||
FuzzyNameMatchStrategy fuzzyNameMatchStrategy = ContextUtils.getBean(FuzzyNameMatchStrategy.class);
|
||||
|
||||
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
|
||||
|
||||
List<FuzzyResult> matches = fuzzyMatchStrategy.getMatches(queryContext, terms);
|
||||
List<FuzzyResult> matches = fuzzyNameMatchStrategy.getMatches(queryContext, terms);
|
||||
|
||||
for (FuzzyResult match : matches) {
|
||||
SchemaElement schemaElement = match.getSchemaElement();
|
||||
|
||||
@@ -22,11 +22,11 @@ import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
/**
|
||||
* Fuzzy Match Strategy
|
||||
* Fuzzy Name Match Strategy
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class FuzzyMatchStrategy extends BaseMatchStrategy<FuzzyResult> {
|
||||
public class FuzzyNameMatchStrategy extends BaseMatchStrategy<FuzzyResult> {
|
||||
|
||||
@Autowired
|
||||
private OptimizationConfig optimizationConfig;
|
||||
@@ -59,7 +59,6 @@ public class FuzzyMatchStrategy extends BaseMatchStrategy<FuzzyResult> {
|
||||
public void detectByStep(QueryContext queryContext, Set<FuzzyResult> existResults, Set<Long> detectModelIds,
|
||||
Integer startIndex, Integer index, int offset) {
|
||||
String detectSegment = queryContext.getRequest().getQueryText().substring(startIndex, index);
|
||||
// step1. build query params
|
||||
if (StringUtils.isBlank(detectSegment)) {
|
||||
return;
|
||||
}
|
||||
@@ -30,7 +30,7 @@ public class HanlpDictMapper extends BaseMapper {
|
||||
String queryText = queryContext.getRequest().getQueryText();
|
||||
List<Term> terms = HanlpHelper.getTerms(queryText);
|
||||
|
||||
HanlpMatchStrategy matchStrategy = ContextUtils.getBean(HanlpMatchStrategy.class);
|
||||
HanlpDictMatchStrategy matchStrategy = ContextUtils.getBean(HanlpDictMatchStrategy.class);
|
||||
|
||||
List<HanlpMapResult> matches = matchStrategy.getMatches(queryContext, terms);
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ import org.springframework.stereotype.Service;
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class HanlpMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
|
||||
@Autowired
|
||||
private MapperHelper mapperHelper;
|
||||
@@ -71,8 +71,8 @@ public class HanlpMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
// step2. suffix search
|
||||
LinkedHashSet<HanlpMapResult> suffixHanlpMapResults = SearchService.suffixSearch(detectSegment,
|
||||
oneDetectionMaxSize,
|
||||
agentId, detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
oneDetectionMaxSize, agentId, detectModelIds).stream()
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
hanlpMapResults.addAll(suffixHanlpMapResults);
|
||||
|
||||
@@ -6,7 +6,7 @@ import org.junit.jupiter.api.Test;
|
||||
/**
|
||||
* MatchStrategyImplTest
|
||||
*/
|
||||
class HanlpMatchStrategyTest extends ContextTest {
|
||||
class HanlpDictMatchStrategyTest extends ContextTest {
|
||||
|
||||
@Test
|
||||
void match() {
|
||||
@@ -11,6 +11,7 @@ import com.tencent.supersonic.knowledge.dictionary.HanlpMapResult;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
@@ -114,7 +115,7 @@ public class SearchService {
|
||||
}
|
||||
|
||||
public static void put(String key, CoreDictionary.Attribute attribute) {
|
||||
trie.put(key, Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList()));
|
||||
trie.put(key, getValue(attribute.nature));
|
||||
}
|
||||
|
||||
|
||||
@@ -138,9 +139,23 @@ public class SearchService {
|
||||
}
|
||||
|
||||
public static void putSuffix(String key, CoreDictionary.Attribute attribute) {
|
||||
suffixTrie.put(key,
|
||||
Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList()));
|
||||
Nature[] nature = attribute.nature;
|
||||
suffixTrie.put(key, getValue(nature));
|
||||
}
|
||||
|
||||
private static List<String> getValue(Nature[] nature) {
|
||||
return Arrays.stream(nature).map(entry -> entry.toString()).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static void remove(DictWord dictWord, Nature[] natures) {
|
||||
trie.remove(dictWord.getWord());
|
||||
if (Objects.nonNull(natures) && natures.length > 0) {
|
||||
trie.put(dictWord.getWord(), getValue(natures));
|
||||
}
|
||||
if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature()
|
||||
.contains(DictWordType.DIMENSION.getType())) {
|
||||
suffixTrie.remove(dictWord.getWord());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.tencent.supersonic.knowledge.utils;
|
||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||
|
||||
import com.hankcs.hanlp.HanLP;
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||
import com.hankcs.hanlp.seg.Segment;
|
||||
@@ -16,6 +17,7 @@ import com.tencent.supersonic.knowledge.service.SearchService;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -171,10 +173,12 @@ public class HanlpHelper {
|
||||
log.info("get attribute:{}", attribute);
|
||||
getDynamicCustomDictionary().remove(dictWord.getWord());
|
||||
StringBuilder sb = new StringBuilder();
|
||||
List<Nature> natureList = new ArrayList<>();
|
||||
for (int i = 0; i < attribute.nature.length; i++) {
|
||||
if (!attribute.nature[i].toString().equals(dictWord.getNature())) {
|
||||
sb.append(attribute.nature[i].toString() + " ");
|
||||
sb.append(attribute.frequency[i] + " ");
|
||||
natureList.add((attribute.nature[i]));
|
||||
}
|
||||
}
|
||||
String natureWithFrequency = sb.toString();
|
||||
@@ -183,6 +187,7 @@ public class HanlpHelper {
|
||||
if (StringUtils.isNotBlank(natureWithFrequency)) {
|
||||
getDynamicCustomDictionary().add(dictWord.getWord(), natureWithFrequency.substring(0, len - 1));
|
||||
}
|
||||
SearchService.remove(dictWord, natureList.toArray(new Nature[0]));
|
||||
}
|
||||
|
||||
public static <T extends MapResult> void transLetterOriginal(List<T> mapResults) {
|
||||
|
||||
Reference in New Issue
Block a user