diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/helper/NatureHelper.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/helper/NatureHelper.java index 169555d10..dbea11ec3 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/helper/NatureHelper.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/helper/NatureHelper.java @@ -141,6 +141,20 @@ public class NatureHelper { && StringUtils.isNumeric(split[1]); } + public static boolean isTermNature(String nature) { + if (StringUtils.isEmpty(nature)) { + return false; + } + if (!nature.startsWith(DictWordType.NATURE_SPILT)) { + return false; + } + String[] split = nature.split(DictWordType.NATURE_SPILT); + if (split.length <= 1) { + return false; + } + return nature.endsWith(DictWordType.TERM.getType()); + } + public static DataSetInfoStat getDataSetStat(List terms) { return DataSetInfoStat.builder() .dataSetCount(getDataSetCount(terms)) diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/BaseMapper.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/BaseMapper.java index c03f8f2f6..be968ee4c 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/BaseMapper.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/BaseMapper.java @@ -10,11 +10,13 @@ import com.tencent.supersonic.headless.core.pojo.QueryContext; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.BeanUtils; +import org.springframework.util.CollectionUtils; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -43,7 +45,7 @@ public abstract class BaseMapper implements SchemaMapper { } private void filter(QueryContext queryContext) { - + filterByDataSetId(queryContext); switch (queryContext.getQueryDataType()) { case TAG: filterByQueryDataType(queryContext, element -> !(element.getIsTag() > 0)); @@ -62,7 +64,19 @@ public abstract class BaseMapper implements SchemaMapper { default: break; } + } + private static void filterByDataSetId(QueryContext queryContext) { + Set dataSetIds = queryContext.getDataSetIds(); + if (CollectionUtils.isEmpty(dataSetIds)) { + return; + } + Set dataSetIdInMapInfo = queryContext.getMapInfo().getDataSetElementMatches().keySet(); + for (Long dataSetId : dataSetIdInMapInfo) { + if (!dataSetIds.contains(dataSetId)) { + queryContext.getMapInfo().getDataSetElementMatches().remove(dataSetId); + } + } } private static void filterByQueryDataType(QueryContext queryContext, Predicate needRemovePredicate) { diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/HanlpDictMatchStrategy.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/HanlpDictMatchStrategy.java index dab46c704..ad9b8ee81 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/HanlpDictMatchStrategy.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/HanlpDictMatchStrategy.java @@ -2,10 +2,10 @@ package com.tencent.supersonic.headless.core.chat.mapper; import com.tencent.supersonic.common.pojo.Constants; import com.tencent.supersonic.headless.api.pojo.response.S2Term; -import com.tencent.supersonic.headless.core.config.OptimizationConfig; -import com.tencent.supersonic.headless.core.pojo.QueryContext; import com.tencent.supersonic.headless.core.chat.knowledge.HanlpMapResult; import com.tencent.supersonic.headless.core.chat.knowledge.KnowledgeService; +import com.tencent.supersonic.headless.core.config.OptimizationConfig; +import com.tencent.supersonic.headless.core.pojo.QueryContext; import lombok.extern.slf4j.Slf4j; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -108,9 +108,12 @@ public class HanlpDictMatchStrategy extends BaseMatchStrategy { Integer oneDetectionSize = optimizationConfig.getOneDetectionSize(); List oneRoundResults = hanlpMapResults.stream().limit(oneDetectionSize) .collect(Collectors.toList()); - if (CollectionUtils.isNotEmpty(dimensionMetrics)) { oneRoundResults = dimensionMetrics; + List termOneRoundResults = hanlpMapResults.stream() + .filter(hanlpMapResult -> mapperHelper.existTerms(hanlpMapResult.getNatures())) + .collect(Collectors.toList()); + oneRoundResults.addAll(termOneRoundResults); } // step6. select mapResul in one round selectResultInOneRound(existResults, oneRoundResults); diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/MapperHelper.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/MapperHelper.java index 36c686bda..a245a4287 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/MapperHelper.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/MapperHelper.java @@ -59,6 +59,15 @@ public class MapperHelper { return false; } + public boolean existTerms(List natures) { + for (String nature : natures) { + if (NatureHelper.isTermNature(nature)) { + return true; + } + } + return false; + } + /*** * get similarity * @param detectSegment