mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-12 12:37:55 +00:00
feat:Support kyuubi presto trino (#2109)
This commit is contained in:
@@ -72,7 +72,8 @@ public class EmbeddingMatchStrategy extends BatchMatchStrategy<EmbeddingResult>
|
||||
// 1. Base detection
|
||||
List<EmbeddingResult> baseResults = super.detect(chatQueryContext, terms, detectDataSetIds);
|
||||
|
||||
boolean useLLM = Boolean.parseBoolean(mapperConfig.getParameterValue(EMBEDDING_MAPPER_USE_LLM));
|
||||
boolean useLLM =
|
||||
Boolean.parseBoolean(mapperConfig.getParameterValue(EMBEDDING_MAPPER_USE_LLM));
|
||||
|
||||
// 2. LLM enhanced detection
|
||||
if (useLLM) {
|
||||
@@ -115,7 +116,8 @@ public class EmbeddingMatchStrategy extends BatchMatchStrategy<EmbeddingResult>
|
||||
* Extract valid word segments by filtering out unwanted word natures
|
||||
*/
|
||||
private Set<String> extractValidSegments(String text) {
|
||||
List<String> natureList = Arrays.asList(StringUtils.split(mapperConfig.getParameterValue(EMBEDDING_MAPPER_ALLOWED_SEGMENT_NATURE ), ","));
|
||||
List<String> natureList = Arrays.asList(StringUtils.split(
|
||||
mapperConfig.getParameterValue(EMBEDDING_MAPPER_ALLOWED_SEGMENT_NATURE), ","));
|
||||
return HanlpHelper.getSegment().seg(text).stream()
|
||||
.filter(t -> natureList.stream().noneMatch(nature -> t.nature.startsWith(nature)))
|
||||
.map(Term::getWord).collect(Collectors.toSet());
|
||||
|
||||
@@ -61,7 +61,8 @@ public class MapFilter {
|
||||
List<SchemaElementMatch> value = entry.getValue();
|
||||
if (!CollectionUtils.isEmpty(value)) {
|
||||
value.removeIf(schemaElementMatch -> StringUtils
|
||||
.length(schemaElementMatch.getDetectWord()) <= 1 && !schemaElementMatch.isLlmMatched());
|
||||
.length(schemaElementMatch.getDetectWord()) <= 1
|
||||
&& !schemaElementMatch.isLlmMatched());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -80,7 +81,7 @@ public class MapFilter {
|
||||
}
|
||||
|
||||
public static void filterByQueryDataType(ChatQueryContext chatQueryContext,
|
||||
Predicate<SchemaElement> needRemovePredicate) {
|
||||
Predicate<SchemaElement> needRemovePredicate) {
|
||||
Map<Long, List<SchemaElementMatch>> dataSetElementMatches =
|
||||
chatQueryContext.getMapInfo().getDataSetElementMatches();
|
||||
for (Map.Entry<Long, List<SchemaElementMatch>> entry : dataSetElementMatches.entrySet()) {
|
||||
|
||||
@@ -63,6 +63,6 @@ public class MapperConfig extends ParameterConfig {
|
||||
"embedding的结果再通过一次LLM来筛选,这时候忽略各个向量阀值", "bool", "Mapper相关配置");
|
||||
|
||||
public static final Parameter EMBEDDING_MAPPER_ALLOWED_SEGMENT_NATURE =
|
||||
new Parameter("s2.mapper.embedding.allowed-segment-nature", "['v', 'd', 'a']", "使用LLM召回二次处理时对问题分词词性的控制",
|
||||
"分词后允许的词性才会进行向量召回", "list", "Mapper相关配置");
|
||||
new Parameter("s2.mapper.embedding.allowed-segment-nature", "['v', 'd', 'a']",
|
||||
"使用LLM召回二次处理时对问题分词词性的控制", "分词后允许的词性才会进行向量召回", "list", "Mapper相关配置");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user