mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-15 06:27:21 +00:00
(improvement)(chat) support remove dictionary and rename match strategy (#332)
This commit is contained in:
@@ -27,11 +27,11 @@ public class FuzzyNameMapper extends BaseMapper {
|
|||||||
|
|
||||||
List<Term> terms = HanlpHelper.getTerms(queryContext.getRequest().getQueryText());
|
List<Term> terms = HanlpHelper.getTerms(queryContext.getRequest().getQueryText());
|
||||||
|
|
||||||
FuzzyMatchStrategy fuzzyMatchStrategy = ContextUtils.getBean(FuzzyMatchStrategy.class);
|
FuzzyNameMatchStrategy fuzzyNameMatchStrategy = ContextUtils.getBean(FuzzyNameMatchStrategy.class);
|
||||||
|
|
||||||
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
|
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
|
||||||
|
|
||||||
List<FuzzyResult> matches = fuzzyMatchStrategy.getMatches(queryContext, terms);
|
List<FuzzyResult> matches = fuzzyNameMatchStrategy.getMatches(queryContext, terms);
|
||||||
|
|
||||||
for (FuzzyResult match : matches) {
|
for (FuzzyResult match : matches) {
|
||||||
SchemaElement schemaElement = match.getSchemaElement();
|
SchemaElement schemaElement = match.getSchemaElement();
|
||||||
|
|||||||
@@ -22,11 +22,11 @@ import org.springframework.stereotype.Service;
|
|||||||
import org.springframework.util.CollectionUtils;
|
import org.springframework.util.CollectionUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fuzzy Match Strategy
|
* Fuzzy Name Match Strategy
|
||||||
*/
|
*/
|
||||||
@Service
|
@Service
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class FuzzyMatchStrategy extends BaseMatchStrategy<FuzzyResult> {
|
public class FuzzyNameMatchStrategy extends BaseMatchStrategy<FuzzyResult> {
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private OptimizationConfig optimizationConfig;
|
private OptimizationConfig optimizationConfig;
|
||||||
@@ -59,7 +59,6 @@ public class FuzzyMatchStrategy extends BaseMatchStrategy<FuzzyResult> {
|
|||||||
public void detectByStep(QueryContext queryContext, Set<FuzzyResult> existResults, Set<Long> detectModelIds,
|
public void detectByStep(QueryContext queryContext, Set<FuzzyResult> existResults, Set<Long> detectModelIds,
|
||||||
Integer startIndex, Integer index, int offset) {
|
Integer startIndex, Integer index, int offset) {
|
||||||
String detectSegment = queryContext.getRequest().getQueryText().substring(startIndex, index);
|
String detectSegment = queryContext.getRequest().getQueryText().substring(startIndex, index);
|
||||||
// step1. build query params
|
|
||||||
if (StringUtils.isBlank(detectSegment)) {
|
if (StringUtils.isBlank(detectSegment)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -30,7 +30,7 @@ public class HanlpDictMapper extends BaseMapper {
|
|||||||
String queryText = queryContext.getRequest().getQueryText();
|
String queryText = queryContext.getRequest().getQueryText();
|
||||||
List<Term> terms = HanlpHelper.getTerms(queryText);
|
List<Term> terms = HanlpHelper.getTerms(queryText);
|
||||||
|
|
||||||
HanlpMatchStrategy matchStrategy = ContextUtils.getBean(HanlpMatchStrategy.class);
|
HanlpDictMatchStrategy matchStrategy = ContextUtils.getBean(HanlpDictMatchStrategy.class);
|
||||||
|
|
||||||
List<HanlpMapResult> matches = matchStrategy.getMatches(queryContext, terms);
|
List<HanlpMapResult> matches = matchStrategy.getMatches(queryContext, terms);
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ import org.springframework.stereotype.Service;
|
|||||||
*/
|
*/
|
||||||
@Service
|
@Service
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class HanlpMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private MapperHelper mapperHelper;
|
private MapperHelper mapperHelper;
|
||||||
@@ -71,8 +71,8 @@ public class HanlpMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
|||||||
detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
||||||
// step2. suffix search
|
// step2. suffix search
|
||||||
LinkedHashSet<HanlpMapResult> suffixHanlpMapResults = SearchService.suffixSearch(detectSegment,
|
LinkedHashSet<HanlpMapResult> suffixHanlpMapResults = SearchService.suffixSearch(detectSegment,
|
||||||
oneDetectionMaxSize,
|
oneDetectionMaxSize, agentId, detectModelIds).stream()
|
||||||
agentId, detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||||
|
|
||||||
hanlpMapResults.addAll(suffixHanlpMapResults);
|
hanlpMapResults.addAll(suffixHanlpMapResults);
|
||||||
|
|
||||||
@@ -6,7 +6,7 @@ import org.junit.jupiter.api.Test;
|
|||||||
/**
|
/**
|
||||||
* MatchStrategyImplTest
|
* MatchStrategyImplTest
|
||||||
*/
|
*/
|
||||||
class HanlpMatchStrategyTest extends ContextTest {
|
class HanlpDictMatchStrategyTest extends ContextTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void match() {
|
void match() {
|
||||||
@@ -11,6 +11,7 @@ import com.tencent.supersonic.knowledge.dictionary.HanlpMapResult;
|
|||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
@@ -114,7 +115,7 @@ public class SearchService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static void put(String key, CoreDictionary.Attribute attribute) {
|
public static void put(String key, CoreDictionary.Attribute attribute) {
|
||||||
trie.put(key, Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList()));
|
trie.put(key, getValue(attribute.nature));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -138,9 +139,23 @@ public class SearchService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static void putSuffix(String key, CoreDictionary.Attribute attribute) {
|
public static void putSuffix(String key, CoreDictionary.Attribute attribute) {
|
||||||
suffixTrie.put(key,
|
Nature[] nature = attribute.nature;
|
||||||
Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList()));
|
suffixTrie.put(key, getValue(nature));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static List<String> getValue(Nature[] nature) {
|
||||||
|
return Arrays.stream(nature).map(entry -> entry.toString()).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void remove(DictWord dictWord, Nature[] natures) {
|
||||||
|
trie.remove(dictWord.getWord());
|
||||||
|
if (Objects.nonNull(natures) && natures.length > 0) {
|
||||||
|
trie.put(dictWord.getWord(), getValue(natures));
|
||||||
|
}
|
||||||
|
if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature()
|
||||||
|
.contains(DictWordType.DIMENSION.getType())) {
|
||||||
|
suffixTrie.remove(dictWord.getWord());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package com.tencent.supersonic.knowledge.utils;
|
|||||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||||
|
|
||||||
import com.hankcs.hanlp.HanLP;
|
import com.hankcs.hanlp.HanLP;
|
||||||
|
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||||
import com.hankcs.hanlp.seg.Segment;
|
import com.hankcs.hanlp.seg.Segment;
|
||||||
@@ -16,6 +17,7 @@ import com.tencent.supersonic.knowledge.service.SearchService;
|
|||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@@ -171,10 +173,12 @@ public class HanlpHelper {
|
|||||||
log.info("get attribute:{}", attribute);
|
log.info("get attribute:{}", attribute);
|
||||||
getDynamicCustomDictionary().remove(dictWord.getWord());
|
getDynamicCustomDictionary().remove(dictWord.getWord());
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
|
List<Nature> natureList = new ArrayList<>();
|
||||||
for (int i = 0; i < attribute.nature.length; i++) {
|
for (int i = 0; i < attribute.nature.length; i++) {
|
||||||
if (!attribute.nature[i].toString().equals(dictWord.getNature())) {
|
if (!attribute.nature[i].toString().equals(dictWord.getNature())) {
|
||||||
sb.append(attribute.nature[i].toString() + " ");
|
sb.append(attribute.nature[i].toString() + " ");
|
||||||
sb.append(attribute.frequency[i] + " ");
|
sb.append(attribute.frequency[i] + " ");
|
||||||
|
natureList.add((attribute.nature[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
String natureWithFrequency = sb.toString();
|
String natureWithFrequency = sb.toString();
|
||||||
@@ -183,6 +187,7 @@ public class HanlpHelper {
|
|||||||
if (StringUtils.isNotBlank(natureWithFrequency)) {
|
if (StringUtils.isNotBlank(natureWithFrequency)) {
|
||||||
getDynamicCustomDictionary().add(dictWord.getWord(), natureWithFrequency.substring(0, len - 1));
|
getDynamicCustomDictionary().add(dictWord.getWord(), natureWithFrequency.substring(0, len - 1));
|
||||||
}
|
}
|
||||||
|
SearchService.remove(dictWord, natureList.toArray(new Nature[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends MapResult> void transLetterOriginal(List<T> mapResults) {
|
public static <T extends MapResult> void transLetterOriginal(List<T> mapResults) {
|
||||||
|
|||||||
Reference in New Issue
Block a user