(improvement)(headless)take only M dimensionValue or N metric/dimension per rond. (#1032)

This commit is contained in:
lexluo09
2024-05-26 23:11:48 +08:00
committed by GitHub
parent 822879cd7b
commit 1fcd880042
5 changed files with 36 additions and 35 deletions

View File

@@ -6,12 +6,6 @@ import com.tencent.supersonic.headless.core.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.core.chat.knowledge.KnowledgeBaseService;
import com.tencent.supersonic.headless.core.config.OptimizationConfig;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
@@ -19,6 +13,11 @@ import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
/**
* HanlpDictMatchStrategy uses <a href="https://www.hanlp.com/">HanLP</a> to
@@ -40,7 +39,7 @@ public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
@Override
public Map<MatchText, List<HanlpMapResult>> match(QueryContext queryContext, List<S2Term> terms,
Set<Long> detectDataSetIds) {
Set<Long> detectDataSetIds) {
String text = queryContext.getQueryText();
if (Objects.isNull(terms) || StringUtils.isEmpty(text)) {
return null;
@@ -62,7 +61,7 @@ public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
}
public void detectByStep(QueryContext queryContext, Set<HanlpMapResult> existResults, Set<Long> detectDataSetIds,
String detectSegment, int offset) {
String detectSegment, int offset) {
// step1. pre search
Integer oneDetectionMaxSize = optimizationConfig.getOneDetectionMaxSize();
LinkedHashSet<HanlpMapResult> hanlpMapResults = knowledgeBaseService.prefixSearch(detectSegment,
@@ -97,19 +96,19 @@ public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
return parseResult;
}).collect(Collectors.toCollection(LinkedHashSet::new));
// step5. take only one dimension or 10 metric/dimension value per rond.
List<HanlpMapResult> dimensionMetrics = hanlpMapResults.stream()
// step5. take only M dimensionValue or N metric/dimension per rond.
List<HanlpMapResult> dimensionValues = hanlpMapResults.stream()
.filter(entry -> mapperHelper.existDimensionValues(entry.getNatures()))
.collect(Collectors.toList())
.stream()
.limit(1)
.limit(optimizationConfig.getOneDetectionDimensionValueSize())
.collect(Collectors.toList());
Integer oneDetectionSize = optimizationConfig.getOneDetectionSize();
List<HanlpMapResult> oneRoundResults = hanlpMapResults.stream().limit(oneDetectionSize)
.collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(dimensionMetrics)) {
oneRoundResults = dimensionMetrics;
// add the dimensionValue/term if it exists dimensionValue
if (CollectionUtils.isNotEmpty(dimensionValues)) {
oneRoundResults = dimensionValues;
List<HanlpMapResult> termOneRoundResults = hanlpMapResults.stream()
.filter(hanlpMapResult -> mapperHelper.existTerms(hanlpMapResult.getNatures()))
.collect(Collectors.toList());

View File

@@ -16,10 +16,12 @@ public class OptimizationConfig {
@Value("${s2.one.detection.size:8}")
private Integer oneDetectionSize;
@Value("${s2.one.detection.max.size:20}")
private Integer oneDetectionMaxSize;
@Value("${s2.one.detection.dimensionValue.size:1}")
private Integer oneDetectionDimensionValueSize;
@Value("${s2.metric.dimension.min.threshold:0.3}")
private Double metricDimensionMinThresholdConfig;