[improvement][headless] Remove entities from the dictionary and search interface (#1878)

This commit is contained in:
lexluo09
2024-11-04 10:06:10 +08:00
committed by GitHub
parent 1e5bf7909e
commit 3b65b1c80b
6 changed files with 9 additions and 62 deletions

View File

@@ -10,10 +10,9 @@ import org.springframework.stereotype.Service;
import java.util.List;
import java.util.Objects;
/** model word nature */
@Service
@Slf4j
public class ModelWordBuilder extends BaseWordWithAliasBuilder {
public class DataSetWordBuilder extends BaseWordWithAliasBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {

View File

@@ -1,37 +0,0 @@
package com.tencent.supersonic.headless.chat.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.List;
import java.util.Objects;
@Service
@Slf4j
public class EntityWordBuilder extends BaseWordWithAliasBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
if (Objects.isNull(schemaElement)) {
return result;
}
result.add(getOneWordNature(word, schemaElement, false));
result.addAll(getOneWordNatureAlias(schemaElement, false));
return result;
}
@Override
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
String nature = DictWordType.NATURE_SPILT + schemaElement.getModel()
+ DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getType();
DictWord dictWord = new DictWord();
dictWord.setWord(word);
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature));
return dictWord;
}
}

View File

@@ -13,8 +13,7 @@ public class WordBuilderFactory {
static {
wordNatures.put(DictWordType.DIMENSION, new DimensionWordBuilder());
wordNatures.put(DictWordType.METRIC, new MetricWordBuilder());
wordNatures.put(DictWordType.DATASET, new ModelWordBuilder());
wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder());
wordNatures.put(DictWordType.DATASET, new DataSetWordBuilder());
wordNatures.put(DictWordType.VALUE, new ValueWordBuilder());
wordNatures.put(DictWordType.TERM, new TermWordBuilder());
}

View File

@@ -18,7 +18,9 @@ import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
/** nature parse helper */
/**
* nature parse helper
*/
@Slf4j
public class NatureHelper {
@@ -50,10 +52,9 @@ public class NatureHelper {
return result;
}
private static boolean isDataSetOrEntity(S2Term term, Integer model) {
private static boolean isDataSet(S2Term term, Integer model) {
String natureStr = term.nature.toString();
return (DictWordType.NATURE_SPILT + model).equals(natureStr)
|| natureStr.endsWith(DictWordType.ENTITY.getType());
return (DictWordType.NATURE_SPILT + model).equals(natureStr);
}
public static Integer getDataSetByNature(Nature nature) {
@@ -119,8 +120,8 @@ public class NatureHelper {
}
private static long getDataSetCount(List<S2Term> terms) {
return terms.stream()
.filter(term -> isDataSetOrEntity(term, getDataSetByNature(term.nature))).count();
return terms.stream().filter(term -> isDataSet(term, getDataSetByNature(term.nature)))
.count();
}
private static long getDimensionValueCount(List<S2Term> terms) {

View File

@@ -1,13 +1,11 @@
package com.tencent.supersonic.headless.chat.mapper;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.chat.ChatQueryContext;
import com.tencent.supersonic.headless.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.chat.knowledge.KnowledgeBaseService;
import com.tencent.supersonic.headless.chat.knowledge.SearchService;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@@ -17,7 +15,6 @@ import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
/**
* SearchMatchStrategy encapsulates a concrete matching algorithm executed during search process.
@@ -66,16 +63,6 @@ public class SearchMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
knowledgeBaseService.suffixSearch(detectSegment, SEARCH_SIZE,
chatQueryContext.getModelIdToDataSetIds(), detectDataSetIds);
hanlpMapResults.addAll(suffixHanlpMapResults);
// remove entity name where search
hanlpMapResults = hanlpMapResults.stream().filter(entry -> {
List<String> natures = entry.getNatures().stream()
.filter(nature -> !nature.endsWith(DictWordType.ENTITY.getType()))
.collect(Collectors.toList());
if (CollectionUtils.isEmpty(natures)) {
return false;
}
return true;
}).collect(Collectors.toList());
MatchText matchText =
MatchText.builder().regText(regText).detectSegment(detectSegment).build();
regTextMap.put(matchText, hanlpMapResults);