From 48ccbc57b9b98c5f98834260f9451fc168432be7 Mon Sep 17 00:00:00 2001 From: lexluo09 <39718951+lexluo09@users.noreply.github.com> Date: Sun, 26 May 2024 23:28:31 +0800 Subject: [PATCH] (improvement)(headless)in the mapper phase, prioritize retrieving the configured top N dimension values. (#1033) --- .../chat/mapper/HanlpDictMatchStrategy.java | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/HanlpDictMatchStrategy.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/HanlpDictMatchStrategy.java index f76a237d4..20171fc0c 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/HanlpDictMatchStrategy.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/HanlpDictMatchStrategy.java @@ -6,6 +6,7 @@ import com.tencent.supersonic.headless.core.chat.knowledge.HanlpMapResult; import com.tencent.supersonic.headless.core.chat.knowledge.KnowledgeBaseService; import com.tencent.supersonic.headless.core.config.OptimizationConfig; import com.tencent.supersonic.headless.core.pojo.QueryContext; +import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; @@ -96,23 +97,27 @@ public class HanlpDictMatchStrategy extends BaseMatchStrategy { return parseResult; }).collect(Collectors.toCollection(LinkedHashSet::new)); - // step5. take only M dimensionValue or N metric/dimension per rond. + // step5. take only M dimensionValue or N-M metric/dimension value per rond. List dimensionValues = hanlpMapResults.stream() .filter(entry -> mapperHelper.existDimensionValues(entry.getNatures())) .limit(optimizationConfig.getOneDetectionDimensionValueSize()) .collect(Collectors.toList()); Integer oneDetectionSize = optimizationConfig.getOneDetectionSize(); - List oneRoundResults = hanlpMapResults.stream().limit(oneDetectionSize) - .collect(Collectors.toList()); + List oneRoundResults = new ArrayList<>(); - // add the dimensionValue/term if it exists dimensionValue + // add the dimensionValue if it exists if (CollectionUtils.isNotEmpty(dimensionValues)) { - oneRoundResults = dimensionValues; - List termOneRoundResults = hanlpMapResults.stream() - .filter(hanlpMapResult -> mapperHelper.existTerms(hanlpMapResult.getNatures())) + oneRoundResults.addAll(dimensionValues); + } + // fill the rest of the list with other results, excluding the dimensionValue if it was added + if (oneRoundResults.size() < oneDetectionSize) { + List additionalResults = hanlpMapResults.stream() + .filter(entry -> !mapperHelper.existDimensionValues(entry.getNatures()) + || !oneRoundResults.contains(entry)) + .limit(oneDetectionSize - oneRoundResults.size()) .collect(Collectors.toList()); - oneRoundResults.addAll(termOneRoundResults); + oneRoundResults.addAll(additionalResults); } // step6. select mapResul in one round selectResultInOneRound(existResults, oneRoundResults);