feat:Support kyuubi presto trino (#2109)

This commit is contained in:
zyclove
2025-02-26 17:33:14 +08:00
committed by GitHub
parent 11ff99cdbe
commit 5e3bafb953
31 changed files with 501 additions and 101 deletions

View File

@@ -72,7 +72,8 @@ public class EmbeddingMatchStrategy extends BatchMatchStrategy<EmbeddingResult>
// 1. Base detection
List<EmbeddingResult> baseResults = super.detect(chatQueryContext, terms, detectDataSetIds);
boolean useLLM = Boolean.parseBoolean(mapperConfig.getParameterValue(EMBEDDING_MAPPER_USE_LLM));
boolean useLLM =
Boolean.parseBoolean(mapperConfig.getParameterValue(EMBEDDING_MAPPER_USE_LLM));
// 2. LLM enhanced detection
if (useLLM) {
@@ -115,7 +116,8 @@ public class EmbeddingMatchStrategy extends BatchMatchStrategy<EmbeddingResult>
* Extract valid word segments by filtering out unwanted word natures
*/
private Set<String> extractValidSegments(String text) {
List<String> natureList = Arrays.asList(StringUtils.split(mapperConfig.getParameterValue(EMBEDDING_MAPPER_ALLOWED_SEGMENT_NATURE ), ","));
List<String> natureList = Arrays.asList(StringUtils.split(
mapperConfig.getParameterValue(EMBEDDING_MAPPER_ALLOWED_SEGMENT_NATURE), ","));
return HanlpHelper.getSegment().seg(text).stream()
.filter(t -> natureList.stream().noneMatch(nature -> t.nature.startsWith(nature)))
.map(Term::getWord).collect(Collectors.toSet());

View File

@@ -61,7 +61,8 @@ public class MapFilter {
List<SchemaElementMatch> value = entry.getValue();
if (!CollectionUtils.isEmpty(value)) {
value.removeIf(schemaElementMatch -> StringUtils
.length(schemaElementMatch.getDetectWord()) <= 1 && !schemaElementMatch.isLlmMatched());
.length(schemaElementMatch.getDetectWord()) <= 1
&& !schemaElementMatch.isLlmMatched());
}
}
}
@@ -80,7 +81,7 @@ public class MapFilter {
}
public static void filterByQueryDataType(ChatQueryContext chatQueryContext,
Predicate<SchemaElement> needRemovePredicate) {
Predicate<SchemaElement> needRemovePredicate) {
Map<Long, List<SchemaElementMatch>> dataSetElementMatches =
chatQueryContext.getMapInfo().getDataSetElementMatches();
for (Map.Entry<Long, List<SchemaElementMatch>> entry : dataSetElementMatches.entrySet()) {

View File

@@ -63,6 +63,6 @@ public class MapperConfig extends ParameterConfig {
"embedding的结果再通过一次LLM来筛选这时候忽略各个向量阀值", "bool", "Mapper相关配置");
public static final Parameter EMBEDDING_MAPPER_ALLOWED_SEGMENT_NATURE =
new Parameter("s2.mapper.embedding.allowed-segment-nature", "['v', 'd', 'a']", "使用LLM召回二次处理时对问题分词词性的控制",
"分词后允许的词性才会进行向量召回", "list", "Mapper相关配置");
new Parameter("s2.mapper.embedding.allowed-segment-nature", "['v', 'd', 'a']",
"使用LLM召回二次处理时对问题分词词性的控制", "分词后允许的词性才会进行向量召回", "list", "Mapper相关配置");
}