(improvement)(chat) support remove dictionary and rename match strategy (#332)

This commit is contained in:
lexluo09
2023-11-06 21:03:38 +08:00
committed by GitHub
parent 6c9983164e
commit 6e3f871015
7 changed files with 32 additions and 13 deletions

View File

@@ -27,11 +27,11 @@ public class FuzzyNameMapper extends BaseMapper {
List<Term> terms = HanlpHelper.getTerms(queryContext.getRequest().getQueryText());
FuzzyMatchStrategy fuzzyMatchStrategy = ContextUtils.getBean(FuzzyMatchStrategy.class);
FuzzyNameMatchStrategy fuzzyNameMatchStrategy = ContextUtils.getBean(FuzzyNameMatchStrategy.class);
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
List<FuzzyResult> matches = fuzzyMatchStrategy.getMatches(queryContext, terms);
List<FuzzyResult> matches = fuzzyNameMatchStrategy.getMatches(queryContext, terms);
for (FuzzyResult match : matches) {
SchemaElement schemaElement = match.getSchemaElement();

View File

@@ -22,11 +22,11 @@ import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
/**
* Fuzzy Match Strategy
* Fuzzy Name Match Strategy
*/
@Service
@Slf4j
public class FuzzyMatchStrategy extends BaseMatchStrategy<FuzzyResult> {
public class FuzzyNameMatchStrategy extends BaseMatchStrategy<FuzzyResult> {
@Autowired
private OptimizationConfig optimizationConfig;
@@ -59,7 +59,6 @@ public class FuzzyMatchStrategy extends BaseMatchStrategy<FuzzyResult> {
public void detectByStep(QueryContext queryContext, Set<FuzzyResult> existResults, Set<Long> detectModelIds,
Integer startIndex, Integer index, int offset) {
String detectSegment = queryContext.getRequest().getQueryText().substring(startIndex, index);
// step1. build query params
if (StringUtils.isBlank(detectSegment)) {
return;
}

View File

@@ -30,7 +30,7 @@ public class HanlpDictMapper extends BaseMapper {
String queryText = queryContext.getRequest().getQueryText();
List<Term> terms = HanlpHelper.getTerms(queryText);
HanlpMatchStrategy matchStrategy = ContextUtils.getBean(HanlpMatchStrategy.class);
HanlpDictMatchStrategy matchStrategy = ContextUtils.getBean(HanlpDictMatchStrategy.class);
List<HanlpMapResult> matches = matchStrategy.getMatches(queryContext, terms);

View File

@@ -25,7 +25,7 @@ import org.springframework.stereotype.Service;
*/
@Service
@Slf4j
public class HanlpMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
@Autowired
private MapperHelper mapperHelper;
@@ -71,8 +71,8 @@ public class HanlpMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
// step2. suffix search
LinkedHashSet<HanlpMapResult> suffixHanlpMapResults = SearchService.suffixSearch(detectSegment,
oneDetectionMaxSize,
agentId, detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
oneDetectionMaxSize, agentId, detectModelIds).stream()
.collect(Collectors.toCollection(LinkedHashSet::new));
hanlpMapResults.addAll(suffixHanlpMapResults);

View File

@@ -6,7 +6,7 @@ import org.junit.jupiter.api.Test;
/**
* MatchStrategyImplTest
*/
class HanlpMatchStrategyTest extends ContextTest {
class HanlpDictMatchStrategyTest extends ContextTest {
@Test
void match() {

View File

@@ -11,6 +11,7 @@ import com.tencent.supersonic.knowledge.dictionary.HanlpMapResult;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
@@ -114,7 +115,7 @@ public class SearchService {
}
public static void put(String key, CoreDictionary.Attribute attribute) {
trie.put(key, Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList()));
trie.put(key, getValue(attribute.nature));
}
@@ -138,9 +139,23 @@ public class SearchService {
}
public static void putSuffix(String key, CoreDictionary.Attribute attribute) {
suffixTrie.put(key,
Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList()));
Nature[] nature = attribute.nature;
suffixTrie.put(key, getValue(nature));
}
private static List<String> getValue(Nature[] nature) {
return Arrays.stream(nature).map(entry -> entry.toString()).collect(Collectors.toList());
}
public static void remove(DictWord dictWord, Nature[] natures) {
trie.remove(dictWord.getWord());
if (Objects.nonNull(natures) && natures.length > 0) {
trie.put(dictWord.getWord(), getValue(natures));
}
if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature()
.contains(DictWordType.DIMENSION.getType())) {
suffixTrie.remove(dictWord.getWord());
}
}
}

View File

@@ -3,6 +3,7 @@ package com.tencent.supersonic.knowledge.utils;
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import com.hankcs.hanlp.seg.Segment;
@@ -16,6 +17,7 @@ import com.tencent.supersonic.knowledge.service.SearchService;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
@@ -171,10 +173,12 @@ public class HanlpHelper {
log.info("get attribute:{}", attribute);
getDynamicCustomDictionary().remove(dictWord.getWord());
StringBuilder sb = new StringBuilder();
List<Nature> natureList = new ArrayList<>();
for (int i = 0; i < attribute.nature.length; i++) {
if (!attribute.nature[i].toString().equals(dictWord.getNature())) {
sb.append(attribute.nature[i].toString() + " ");
sb.append(attribute.frequency[i] + " ");
natureList.add((attribute.nature[i]));
}
}
String natureWithFrequency = sb.toString();
@@ -183,6 +187,7 @@ public class HanlpHelper {
if (StringUtils.isNotBlank(natureWithFrequency)) {
getDynamicCustomDictionary().add(dictWord.getWord(), natureWithFrequency.substring(0, len - 1));
}
SearchService.remove(dictWord, natureList.toArray(new Nature[0]));
}
public static <T extends MapResult> void transLetterOriginal(List<T> mapResults) {