mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-12 12:37:55 +00:00
[improvement][chat]Introduce AllFieldMapper to increase parsing robustness when normal pipeline fails.
[improvement][chat]Introduce `AllFieldMapper` to increase parsing robustness when normal pipeline fails.
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
package com.tencent.supersonic.headless.chat.mapper;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.headless.api.pojo.DataSetSchema;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.headless.api.pojo.enums.MapModeEnum;
|
||||
import com.tencent.supersonic.headless.chat.ChatQueryContext;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Slf4j
|
||||
public class AllFieldMapper extends BaseMapper {
|
||||
|
||||
@Override
|
||||
public boolean accept(ChatQueryContext chatQueryContext) {
|
||||
return MapModeEnum.ALL.equals(chatQueryContext.getRequest().getMapModeEnum());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doMap(ChatQueryContext chatQueryContext) {
|
||||
Map<Long, DataSetSchema> schemaMap =
|
||||
chatQueryContext.getSemanticSchema().getDataSetSchemaMap();
|
||||
for (Map.Entry<Long, DataSetSchema> entry : schemaMap.entrySet()) {
|
||||
List<SchemaElement> schemaElements = Lists.newArrayList();
|
||||
schemaElements.addAll(entry.getValue().getDimensions());
|
||||
schemaElements.addAll(entry.getValue().getMetrics());
|
||||
|
||||
for (SchemaElement schemaElement : schemaElements) {
|
||||
chatQueryContext.getMapInfo().getMatchedElements(entry.getKey())
|
||||
.add(SchemaElementMatch.builder().word(schemaElement.getName())
|
||||
.element(schemaElement).detectWord(schemaElement.getName())
|
||||
.similarity(1.0).build());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -27,6 +27,10 @@ public abstract class BaseMapper implements SchemaMapper {
|
||||
|
||||
@Override
|
||||
public void map(ChatQueryContext chatQueryContext) {
|
||||
if (!accept(chatQueryContext)) {
|
||||
return;
|
||||
}
|
||||
|
||||
String simpleName = this.getClass().getSimpleName();
|
||||
long startTime = System.currentTimeMillis();
|
||||
log.debug("before {},mapInfo:{}", simpleName,
|
||||
@@ -46,6 +50,10 @@ public abstract class BaseMapper implements SchemaMapper {
|
||||
|
||||
public abstract void doMap(ChatQueryContext chatQueryContext);
|
||||
|
||||
protected boolean accept(ChatQueryContext chatQueryContext) {
|
||||
return true;
|
||||
}
|
||||
|
||||
public void addToSchemaMap(SchemaMapInfo schemaMap, Long dataSetId,
|
||||
SchemaElementMatch newElementMatch) {
|
||||
Map<Long, List<SchemaElementMatch>> dataSetElementMatches =
|
||||
|
||||
@@ -20,12 +20,13 @@ import java.util.Objects;
|
||||
*/
|
||||
@Slf4j
|
||||
public class EmbeddingMapper extends BaseMapper {
|
||||
public void doMap(ChatQueryContext chatQueryContext) {
|
||||
// Check if the map mode is LOOSE
|
||||
if (!MapModeEnum.LOOSE.equals(chatQueryContext.getRequest().getMapModeEnum())) {
|
||||
return;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(ChatQueryContext chatQueryContext) {
|
||||
return MapModeEnum.LOOSE.equals(chatQueryContext.getRequest().getMapModeEnum());
|
||||
}
|
||||
|
||||
public void doMap(ChatQueryContext chatQueryContext) {
|
||||
// 1. Query from embedding by queryText
|
||||
EmbeddingMatchStrategy matchStrategy = ContextUtils.getBean(EmbeddingMatchStrategy.class);
|
||||
List<EmbeddingResult> matchResults = getMatches(chatQueryContext, matchStrategy);
|
||||
@@ -62,4 +63,5 @@ public class EmbeddingMapper extends BaseMapper {
|
||||
addToSchemaMap(chatQueryContext.getMapInfo(), dataSetId, schemaElementMatch);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -35,11 +35,11 @@ public class MapperConfig extends ParameterConfig {
|
||||
"维度值相似度阈值在动态调整中的最低值", "number", "Mapper相关配置");
|
||||
|
||||
public static final Parameter EMBEDDING_MAPPER_TEXT_SIZE =
|
||||
new Parameter("s2.mapper.embedding.word.size", "4", "用于向量召回文本长度",
|
||||
new Parameter("s2.mapper.embedding.word.size", "3", "用于向量召回文本长度",
|
||||
"为提高向量召回效率, 按指定长度进行向量语义召回", "number", "Mapper相关配置");
|
||||
|
||||
public static final Parameter EMBEDDING_MAPPER_TEXT_STEP =
|
||||
new Parameter("s2.mapper.embedding.word.step", "3", "向量召回文本每步长度",
|
||||
new Parameter("s2.mapper.embedding.word.step", "2", "向量召回文本每步长度",
|
||||
"为提高向量召回效率, 按指定每步长度进行召回", "number", "Mapper相关配置");
|
||||
|
||||
public static final Parameter EMBEDDING_MAPPER_BATCH =
|
||||
@@ -51,7 +51,7 @@ public class MapperConfig extends ParameterConfig {
|
||||
"每个文本进行向量语义召回的文本结果个数", "number", "Mapper相关配置");
|
||||
|
||||
public static final Parameter EMBEDDING_MAPPER_THRESHOLD =
|
||||
new Parameter("s2.mapper.embedding.threshold", "0.98", "向量召回相似度阈值", "相似度小于该阈值的则舍弃",
|
||||
new Parameter("s2.mapper.embedding.threshold", "0.8", "向量召回相似度阈值", "相似度小于该阈值的则舍弃",
|
||||
"number", "Mapper相关配置");
|
||||
|
||||
public static final Parameter EMBEDDING_MAPPER_ROUND_NUMBER =
|
||||
|
||||
@@ -9,22 +9,23 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
public class TimeFieldMapper extends BaseMapper {
|
||||
public class PartitionTimeMapper extends BaseMapper {
|
||||
|
||||
@Override
|
||||
public boolean accept(ChatQueryContext chatQueryContext) {
|
||||
return !(chatQueryContext.getRequest().getText2SQLType().equals(Text2SQLType.ONLY_RULE)
|
||||
|| chatQueryContext.getMapInfo().isEmpty());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doMap(ChatQueryContext chatQueryContext) {
|
||||
if (chatQueryContext.getRequest().getText2SQLType().equals(Text2SQLType.ONLY_RULE)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Map<Long, DataSetSchema> schemaMap =
|
||||
chatQueryContext.getSemanticSchema().getDataSetSchemaMap();
|
||||
for (Map.Entry<Long, DataSetSchema> entry : schemaMap.entrySet()) {
|
||||
List<SchemaElement> timeDims = entry.getValue().getDimensions().stream()
|
||||
.filter(dim -> dim.getTimeFormat() != null).collect(Collectors.toList());
|
||||
.filter(SchemaElement::isPartitionTime).toList();
|
||||
for (SchemaElement schemaElement : timeDims) {
|
||||
chatQueryContext.getMapInfo().getMatchedElements(entry.getKey())
|
||||
.add(SchemaElementMatch.builder().word(schemaElement.getName())
|
||||
@@ -21,14 +21,16 @@ import java.util.stream.Collectors;
|
||||
@Slf4j
|
||||
public class QueryFilterMapper extends BaseMapper {
|
||||
|
||||
private double similarity = 1.0;
|
||||
private final double similarity = 1.0;
|
||||
|
||||
@Override
|
||||
public boolean accept(ChatQueryContext chatQueryContext) {
|
||||
return !chatQueryContext.getRequest().getDataSetIds().isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doMap(ChatQueryContext chatQueryContext) {
|
||||
Set<Long> dataSetIds = chatQueryContext.getRequest().getDataSetIds();
|
||||
if (CollectionUtils.isEmpty(dataSetIds)) {
|
||||
return;
|
||||
}
|
||||
SchemaMapInfo schemaMapInfo = chatQueryContext.getMapInfo();
|
||||
clearOtherSchemaElementMatch(dataSetIds, schemaMapInfo);
|
||||
for (Long dataSetId : dataSetIds) {
|
||||
|
||||
@@ -16,14 +16,15 @@ import java.util.List;
|
||||
@Slf4j
|
||||
public class TermDescMapper extends BaseMapper {
|
||||
|
||||
@Override
|
||||
public boolean accept(ChatQueryContext chatQueryContext) {
|
||||
return !(CollectionUtils.isEmpty(chatQueryContext.getMapInfo().getTermDescriptionToMap())
|
||||
|| chatQueryContext.getRequest().isDescriptionMapped());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doMap(ChatQueryContext chatQueryContext) {
|
||||
SchemaMapInfo mapInfo = chatQueryContext.getMapInfo();
|
||||
List<SchemaElement> termElements = mapInfo.getTermDescriptionToMap();
|
||||
if (CollectionUtils.isEmpty(termElements)
|
||||
|| chatQueryContext.getRequest().isDescriptionMapped()) {
|
||||
return;
|
||||
}
|
||||
List<SchemaElement> termElements = chatQueryContext.getMapInfo().getTermDescriptionToMap();
|
||||
for (SchemaElement schemaElement : termElements) {
|
||||
ChatQueryContext queryCtx =
|
||||
buildQueryContext(chatQueryContext, schemaElement.getDescription());
|
||||
|
||||
Reference in New Issue
Block a user