mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 11:07:06 +00:00
[improvement][headless] Remove entities from the dictionary and search interface (#1878)
This commit is contained in:
@@ -14,8 +14,6 @@ public enum DictWordType {
|
||||
|
||||
DATASET("dataSet"),
|
||||
|
||||
ENTITY("entity"),
|
||||
|
||||
NUMBER("m"),
|
||||
|
||||
TAG("tag"),
|
||||
|
||||
@@ -10,10 +10,9 @@ import org.springframework.stereotype.Service;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/** model word nature */
|
||||
@Service
|
||||
@Slf4j
|
||||
public class ModelWordBuilder extends BaseWordWithAliasBuilder {
|
||||
public class DataSetWordBuilder extends BaseWordWithAliasBuilder {
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
@@ -1,37 +0,0 @@
|
||||
package com.tencent.supersonic.headless.chat.knowledge.builder;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class EntityWordBuilder extends BaseWordWithAliasBuilder {
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
if (Objects.isNull(schemaElement)) {
|
||||
return result;
|
||||
}
|
||||
result.add(getOneWordNature(word, schemaElement, false));
|
||||
result.addAll(getOneWordNatureAlias(schemaElement, false));
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
|
||||
String nature = DictWordType.NATURE_SPILT + schemaElement.getModel()
|
||||
+ DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getType();
|
||||
DictWord dictWord = new DictWord();
|
||||
dictWord.setWord(word);
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature));
|
||||
return dictWord;
|
||||
}
|
||||
}
|
||||
@@ -13,8 +13,7 @@ public class WordBuilderFactory {
|
||||
static {
|
||||
wordNatures.put(DictWordType.DIMENSION, new DimensionWordBuilder());
|
||||
wordNatures.put(DictWordType.METRIC, new MetricWordBuilder());
|
||||
wordNatures.put(DictWordType.DATASET, new ModelWordBuilder());
|
||||
wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder());
|
||||
wordNatures.put(DictWordType.DATASET, new DataSetWordBuilder());
|
||||
wordNatures.put(DictWordType.VALUE, new ValueWordBuilder());
|
||||
wordNatures.put(DictWordType.TERM, new TermWordBuilder());
|
||||
}
|
||||
|
||||
@@ -18,7 +18,9 @@ import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/** nature parse helper */
|
||||
/**
|
||||
* nature parse helper
|
||||
*/
|
||||
@Slf4j
|
||||
public class NatureHelper {
|
||||
|
||||
@@ -50,10 +52,9 @@ public class NatureHelper {
|
||||
return result;
|
||||
}
|
||||
|
||||
private static boolean isDataSetOrEntity(S2Term term, Integer model) {
|
||||
private static boolean isDataSet(S2Term term, Integer model) {
|
||||
String natureStr = term.nature.toString();
|
||||
return (DictWordType.NATURE_SPILT + model).equals(natureStr)
|
||||
|| natureStr.endsWith(DictWordType.ENTITY.getType());
|
||||
return (DictWordType.NATURE_SPILT + model).equals(natureStr);
|
||||
}
|
||||
|
||||
public static Integer getDataSetByNature(Nature nature) {
|
||||
@@ -119,8 +120,8 @@ public class NatureHelper {
|
||||
}
|
||||
|
||||
private static long getDataSetCount(List<S2Term> terms) {
|
||||
return terms.stream()
|
||||
.filter(term -> isDataSetOrEntity(term, getDataSetByNature(term.nature))).count();
|
||||
return terms.stream().filter(term -> isDataSet(term, getDataSetByNature(term.nature)))
|
||||
.count();
|
||||
}
|
||||
|
||||
private static long getDimensionValueCount(List<S2Term> terms) {
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
package com.tencent.supersonic.headless.chat.mapper;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
|
||||
import com.tencent.supersonic.headless.chat.ChatQueryContext;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.HanlpMapResult;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.KnowledgeBaseService;
|
||||
import com.tencent.supersonic.headless.chat.knowledge.SearchService;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
@@ -17,7 +15,6 @@ import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* SearchMatchStrategy encapsulates a concrete matching algorithm executed during search process.
|
||||
@@ -66,16 +63,6 @@ public class SearchMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
knowledgeBaseService.suffixSearch(detectSegment, SEARCH_SIZE,
|
||||
chatQueryContext.getModelIdToDataSetIds(), detectDataSetIds);
|
||||
hanlpMapResults.addAll(suffixHanlpMapResults);
|
||||
// remove entity name where search
|
||||
hanlpMapResults = hanlpMapResults.stream().filter(entry -> {
|
||||
List<String> natures = entry.getNatures().stream()
|
||||
.filter(nature -> !nature.endsWith(DictWordType.ENTITY.getType()))
|
||||
.collect(Collectors.toList());
|
||||
if (CollectionUtils.isEmpty(natures)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}).collect(Collectors.toList());
|
||||
MatchText matchText =
|
||||
MatchText.builder().regText(regText).detectSegment(detectSegment).build();
|
||||
regTextMap.put(matchText, hanlpMapResults);
|
||||
|
||||
Reference in New Issue
Block a user