mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-13 04:57:28 +00:00
(improvement)(chat) support remove dictionary and rename match strategy (#332)
This commit is contained in:
@@ -11,6 +11,7 @@ import com.tencent.supersonic.knowledge.dictionary.HanlpMapResult;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
@@ -114,7 +115,7 @@ public class SearchService {
|
||||
}
|
||||
|
||||
public static void put(String key, CoreDictionary.Attribute attribute) {
|
||||
trie.put(key, Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList()));
|
||||
trie.put(key, getValue(attribute.nature));
|
||||
}
|
||||
|
||||
|
||||
@@ -138,9 +139,23 @@ public class SearchService {
|
||||
}
|
||||
|
||||
public static void putSuffix(String key, CoreDictionary.Attribute attribute) {
|
||||
suffixTrie.put(key,
|
||||
Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList()));
|
||||
Nature[] nature = attribute.nature;
|
||||
suffixTrie.put(key, getValue(nature));
|
||||
}
|
||||
|
||||
private static List<String> getValue(Nature[] nature) {
|
||||
return Arrays.stream(nature).map(entry -> entry.toString()).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static void remove(DictWord dictWord, Nature[] natures) {
|
||||
trie.remove(dictWord.getWord());
|
||||
if (Objects.nonNull(natures) && natures.length > 0) {
|
||||
trie.put(dictWord.getWord(), getValue(natures));
|
||||
}
|
||||
if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature()
|
||||
.contains(DictWordType.DIMENSION.getType())) {
|
||||
suffixTrie.remove(dictWord.getWord());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.tencent.supersonic.knowledge.utils;
|
||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||
|
||||
import com.hankcs.hanlp.HanLP;
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||
import com.hankcs.hanlp.seg.Segment;
|
||||
@@ -16,6 +17,7 @@ import com.tencent.supersonic.knowledge.service.SearchService;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -171,10 +173,12 @@ public class HanlpHelper {
|
||||
log.info("get attribute:{}", attribute);
|
||||
getDynamicCustomDictionary().remove(dictWord.getWord());
|
||||
StringBuilder sb = new StringBuilder();
|
||||
List<Nature> natureList = new ArrayList<>();
|
||||
for (int i = 0; i < attribute.nature.length; i++) {
|
||||
if (!attribute.nature[i].toString().equals(dictWord.getNature())) {
|
||||
sb.append(attribute.nature[i].toString() + " ");
|
||||
sb.append(attribute.frequency[i] + " ");
|
||||
natureList.add((attribute.nature[i]));
|
||||
}
|
||||
}
|
||||
String natureWithFrequency = sb.toString();
|
||||
@@ -183,6 +187,7 @@ public class HanlpHelper {
|
||||
if (StringUtils.isNotBlank(natureWithFrequency)) {
|
||||
getDynamicCustomDictionary().add(dictWord.getWord(), natureWithFrequency.substring(0, len - 1));
|
||||
}
|
||||
SearchService.remove(dictWord, natureList.toArray(new Nature[0]));
|
||||
}
|
||||
|
||||
public static <T extends MapResult> void transLetterOriginal(List<T> mapResults) {
|
||||
|
||||
Reference in New Issue
Block a user