(improvement)(chat) support remove dictionary and rename match strategy (#332)

This commit is contained in:
lexluo09
2023-11-06 21:03:38 +08:00
committed by GitHub
parent 6c9983164e
commit 6e3f871015
7 changed files with 32 additions and 13 deletions

View File

@@ -11,6 +11,7 @@ import com.tencent.supersonic.knowledge.dictionary.HanlpMapResult;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
@@ -114,7 +115,7 @@ public class SearchService {
}
public static void put(String key, CoreDictionary.Attribute attribute) {
trie.put(key, Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList()));
trie.put(key, getValue(attribute.nature));
}
@@ -138,9 +139,23 @@ public class SearchService {
}
public static void putSuffix(String key, CoreDictionary.Attribute attribute) {
suffixTrie.put(key,
Arrays.stream(attribute.nature).map(entry -> entry.toString()).collect(Collectors.toList()));
Nature[] nature = attribute.nature;
suffixTrie.put(key, getValue(nature));
}
private static List<String> getValue(Nature[] nature) {
return Arrays.stream(nature).map(entry -> entry.toString()).collect(Collectors.toList());
}
public static void remove(DictWord dictWord, Nature[] natures) {
trie.remove(dictWord.getWord());
if (Objects.nonNull(natures) && natures.length > 0) {
trie.put(dictWord.getWord(), getValue(natures));
}
if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature()
.contains(DictWordType.DIMENSION.getType())) {
suffixTrie.remove(dictWord.getWord());
}
}
}

View File

@@ -3,6 +3,7 @@ package com.tencent.supersonic.knowledge.utils;
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import com.hankcs.hanlp.seg.Segment;
@@ -16,6 +17,7 @@ import com.tencent.supersonic.knowledge.service.SearchService;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
@@ -171,10 +173,12 @@ public class HanlpHelper {
log.info("get attribute:{}", attribute);
getDynamicCustomDictionary().remove(dictWord.getWord());
StringBuilder sb = new StringBuilder();
List<Nature> natureList = new ArrayList<>();
for (int i = 0; i < attribute.nature.length; i++) {
if (!attribute.nature[i].toString().equals(dictWord.getNature())) {
sb.append(attribute.nature[i].toString() + " ");
sb.append(attribute.frequency[i] + " ");
natureList.add((attribute.nature[i]));
}
}
String natureWithFrequency = sb.toString();
@@ -183,6 +187,7 @@ public class HanlpHelper {
if (StringUtils.isNotBlank(natureWithFrequency)) {
getDynamicCustomDictionary().add(dictWord.getWord(), natureWithFrequency.substring(0, len - 1));
}
SearchService.remove(dictWord, natureList.toArray(new Nature[0]));
}
public static <T extends MapResult> void transLetterOriginal(List<T> mapResults) {