[improvement][chat]Introduce parameter to control # of semantic fields should be sent to the LLM.
Some checks are pending
supersonic CentOS CI / build (21) (push) Waiting to run
supersonic mac CI / build (21) (push) Waiting to run
supersonic ubuntu CI / build (21) (push) Waiting to run
supersonic windows CI / build (21) (push) Waiting to run

This commit is contained in:
jerryjzhang
2025-01-03 22:16:24 +08:00
parent 43140e695b
commit 22f6190e7c
3 changed files with 22 additions and 10 deletions

View File

@@ -28,12 +28,13 @@ public class AllFieldMapper extends BaseMapper {
schemaElements.addAll(entry.getValue().getDimensions());
schemaElements.addAll(entry.getValue().getMetrics());
List<SchemaElementMatch> allMatches = Lists.newArrayList();
for (SchemaElement schemaElement : schemaElements) {
chatQueryContext.getMapInfo().getMatchedElements(entry.getKey())
.add(SchemaElementMatch.builder().word(schemaElement.getName())
.element(schemaElement).detectWord(schemaElement.getName())
.similarity(1.0).build());
allMatches.add(SchemaElementMatch.builder().word(schemaElement.getName())
.element(schemaElement).detectWord(schemaElement.getName())
.similarity(0.1).build());
}
chatQueryContext.getMapInfo().setMatchedElements(entry.getKey(), allMatches);
}
}

View File

@@ -49,9 +49,13 @@ public class ParserConfig extends ParameterConfig {
public static final Parameter PARSER_SHOW_COUNT =
new Parameter("s2.parser.show.count", "3", "解析结果展示个数", "前端展示的解析个数", "number", "语义解析配置");
public static final Parameter PARSER_FIELDS_COUNT_THRESHOLD =
new Parameter("s2.parser.field.count.threshold", "3", "语义字段个数阈值",
"如果映射字段小于该阈值则将数据集所有字段输入LLM", "number", "语义解析配置");
@Override
public List<Parameter> getSysParameters() {
return Lists.newArrayList(PARSER_LINKING_VALUE_ENABLE, PARSER_FEW_SHOT_NUMBER,
PARSER_SELF_CONSISTENCY_NUMBER, PARSER_SHOW_COUNT);
PARSER_SELF_CONSISTENCY_NUMBER, PARSER_SHOW_COUNT, PARSER_FIELDS_COUNT_THRESHOLD);
}
}

View File

@@ -24,8 +24,7 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_LINKING_VALUE_ENABLE;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_STRATEGY_TYPE;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.*;
@Slf4j
@Service
@@ -43,15 +42,23 @@ public class LLMRequestService {
Map<Long, String> dataSetIdToName = queryCtx.getSemanticSchema().getDataSetIdToName();
String queryText = queryCtx.getRequest().getQueryText();
LLMReq.LLMSchema llmSchema = new LLMReq.LLMSchema();
int fieldCntThreshold =
Integer.valueOf(parserConfig.getParameterValue(PARSER_FIELDS_COUNT_THRESHOLD));
if (queryCtx.getMapInfo().getMatchedElements(dataSetId).size() <= fieldCntThreshold) {
llmSchema.setMetrics(queryCtx.getSemanticSchema().getMetrics());
llmSchema.setDimensions(queryCtx.getSemanticSchema().getDimensions());
} else {
llmSchema.setMetrics(getMappedMetrics(queryCtx, dataSetId));
llmSchema.setDimensions(getMappedDimensions(queryCtx, dataSetId));
}
LLMReq llmReq = new LLMReq();
llmReq.setQueryText(queryText);
LLMReq.LLMSchema llmSchema = new LLMReq.LLMSchema();
llmReq.setSchema(llmSchema);
llmSchema.setDatabaseType(getDatabaseType(queryCtx, dataSetId));
llmSchema.setDataSetId(dataSetId);
llmSchema.setDataSetName(dataSetIdToName.get(dataSetId));
llmSchema.setMetrics(getMappedMetrics(queryCtx, dataSetId));
llmSchema.setDimensions(getMappedDimensions(queryCtx, dataSetId));
llmSchema.setPartitionTime(getPartitionTime(queryCtx, dataSetId));
llmSchema.setPrimaryKey(getPrimaryKey(queryCtx, dataSetId));