(improvement)(chat) Support semantic understanding, optimize the overall code of the mapper. (#321)

This commit is contained in:
lexluo09
2023-11-05 13:14:07 +08:00
committed by GitHub
parent 2fe56e7462
commit 910384d17f
25 changed files with 716 additions and 375 deletions

View File

@@ -0,0 +1,34 @@
package com.tencent.supersonic.knowledge.dictionary;
import com.google.common.base.Objects;
import java.util.Map;
import lombok.Data;
import lombok.ToString;
@Data
@ToString
public class EmbeddingResult extends MapResult {
private String id;
private double distance;
private Map<String, String> metadata;
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
EmbeddingResult that = (EmbeddingResult) o;
return Objects.equal(id, that.id);
}
@Override
public int hashCode() {
return Objects.hashCode(id);
}
}

View File

@@ -0,0 +1,44 @@
package com.tencent.supersonic.knowledge.dictionary;
import com.google.common.base.Objects;
import java.util.List;
import lombok.Data;
import lombok.ToString;
@Data
@ToString
public class HanlpMapResult extends MapResult {
private List<String> natures;
private int offset = 0;
private double similarity;
public HanlpMapResult(String name, List<String> natures, String detectWord) {
this.name = name;
this.natures = natures;
this.detectWord = detectWord;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
HanlpMapResult hanlpMapResult = (HanlpMapResult) o;
return Objects.equal(name, hanlpMapResult.name) && Objects.equal(natures, hanlpMapResult.natures);
}
@Override
public int hashCode() {
return Objects.hashCode(name, natures);
}
public void setOffset(int offset) {
this.offset = offset;
}
}

View File

@@ -1,8 +1,6 @@
package com.tencent.supersonic.knowledge.dictionary;
import com.google.common.base.Objects;
import java.io.Serializable;
import java.util.List;
import lombok.Data;
import lombok.ToString;
@@ -10,43 +8,6 @@ import lombok.ToString;
@ToString
public class MapResult implements Serializable {
private String name;
private List<String> natures;
private int offset = 0;
private double similarity;
private String detectWord;
public MapResult() {
}
public MapResult(String name, List<String> natures, String detectWord) {
this.name = name;
this.natures = natures;
this.detectWord = detectWord;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
MapResult mapResult = (MapResult) o;
return Objects.equal(name, mapResult.name) && Objects.equal(natures, mapResult.natures);
}
@Override
public int hashCode() {
return Objects.hashCode(name, natures);
}
public void setOffset(int offset) {
this.offset = offset;
}
protected String name;
protected String detectWord;
}

View File

@@ -7,7 +7,7 @@ import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.DictionaryAttributeUtil;
import com.tencent.supersonic.knowledge.dictionary.MapResult;
import com.tencent.supersonic.knowledge.dictionary.HanlpMapResult;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
@@ -38,17 +38,17 @@ public class SearchService {
* @param key
* @return
*/
public static List<MapResult> prefixSearch(String key, int limit, Integer agentId, Set<Long> detectModelIds) {
public static List<HanlpMapResult> prefixSearch(String key, int limit, Integer agentId, Set<Long> detectModelIds) {
return prefixSearch(key, limit, agentId, trie, detectModelIds);
}
public static List<MapResult> prefixSearch(String key, int limit, Integer agentId, BinTrie<List<String>> binTrie,
Set<Long> detectModelIds) {
public static List<HanlpMapResult> prefixSearch(String key, int limit, Integer agentId,
BinTrie<List<String>> binTrie, Set<Long> detectModelIds) {
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie, agentId, detectModelIds);
return result.stream().map(
entry -> {
String name = entry.getKey().replace("#", " ");
return new MapResult(name, entry.getValue(), key);
return new HanlpMapResult(name, entry.getValue(), key);
}
).sorted((a, b) -> -(b.getName().length() - a.getName().length()))
.limit(SEARCH_SIZE)
@@ -60,13 +60,13 @@ public class SearchService {
* @param key
* @return
*/
public static List<MapResult> suffixSearch(String key, int limit, Integer agentId, Set<Long> detectModelIds) {
public static List<HanlpMapResult> suffixSearch(String key, int limit, Integer agentId, Set<Long> detectModelIds) {
String reverseDetectSegment = StringUtils.reverse(key);
return suffixSearch(reverseDetectSegment, limit, agentId, suffixTrie, detectModelIds);
}
public static List<MapResult> suffixSearch(String key, int limit, Integer agentId, BinTrie<List<String>> binTrie,
Set<Long> detectModelIds) {
public static List<HanlpMapResult> suffixSearch(String key, int limit, Integer agentId,
BinTrie<List<String>> binTrie, Set<Long> detectModelIds) {
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie, agentId, detectModelIds);
return result.stream().map(
entry -> {
@@ -75,7 +75,7 @@ public class SearchService {
.map(nature -> nature.replaceAll(DictWordType.SUFFIX.getType(), ""))
.collect(Collectors.toList());
name = StringUtils.reverse(name);
return new MapResult(name, natures, key);
return new HanlpMapResult(name, natures, key);
}
).sorted((a, b) -> -(b.getName().length() - a.getName().length()))
.limit(SEARCH_SIZE)

View File

@@ -9,17 +9,16 @@ import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.HadoopFileIOAdapter;
import com.tencent.supersonic.knowledge.dictionary.MapResult;
import com.tencent.supersonic.knowledge.dictionary.MultiCustomDictionary;
import com.tencent.supersonic.knowledge.service.SearchService;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import com.tencent.supersonic.knowledge.dictionary.MapResult;
import com.tencent.supersonic.knowledge.dictionary.HadoopFileIOAdapter;
import com.tencent.supersonic.knowledge.service.SearchService;
import com.tencent.supersonic.knowledge.dictionary.MultiCustomDictionary;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;
@@ -186,11 +185,11 @@ public class HanlpHelper {
}
}
public static void transLetterOriginal(List<MapResult> mapResults) {
public static <T extends MapResult> void transLetterOriginal(List<T> mapResults) {
if (CollectionUtils.isEmpty(mapResults)) {
return;
}
for (MapResult mapResult : mapResults) {
for (T mapResult : mapResults) {
if (MultiCustomDictionary.isLowerLetter(mapResult.getName())) {
if (CustomDictionary.contains(mapResult.getName())) {
CoreDictionary.Attribute attribute = CustomDictionary.get(mapResult.getName());