diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/BaseMapper.java b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/BaseMapper.java new file mode 100644 index 000000000..f87bc52c3 --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/BaseMapper.java @@ -0,0 +1,41 @@ +package com.tencent.supersonic.chat.mapper; + +import com.tencent.supersonic.chat.api.component.SchemaMapper; +import com.tencent.supersonic.chat.api.pojo.QueryContext; +import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch; +import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; + +/** + * base Mapper + */ +@Slf4j +public abstract class BaseMapper implements SchemaMapper { + + @Override + public void map(QueryContext queryContext) { + + String simpleName = this.getClass().getSimpleName(); + + log.debug("before {},mapInfo:{}", simpleName, queryContext.getMapInfo()); + + work(queryContext); + + log.debug("after {},mapInfo:{}", simpleName, queryContext.getMapInfo()); + } + + public abstract void work(QueryContext queryContext); + + + public void addToSchemaMap(SchemaMapInfo schemaMap, Long modelId, SchemaElementMatch schemaElementMatch) { + Map> modelElementMatches = schemaMap.getModelElementMatches(); + List schemaElementMatches = modelElementMatches.putIfAbsent(modelId, new ArrayList<>()); + if (schemaElementMatches == null) { + schemaElementMatches = modelElementMatches.get(modelId); + } + schemaElementMatches.add(schemaElementMatch); + } +} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/EmbeddingMapper.java b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/EmbeddingMapper.java new file mode 100644 index 000000000..8db054d4b --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/EmbeddingMapper.java @@ -0,0 +1,16 @@ +package com.tencent.supersonic.chat.mapper; + +import com.tencent.supersonic.chat.api.pojo.QueryContext; +import lombok.extern.slf4j.Slf4j; + +/*** + * a mapper that is capable of semantic understanding of text. + */ +@Slf4j +public class EmbeddingMapper extends BaseMapper { + + @Override + public void work(QueryContext queryContext) { + + } +} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/EntityMapper.java b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/EntityMapper.java index f5c60bb1d..a9ccf3768 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/EntityMapper.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/EntityMapper.java @@ -1,27 +1,28 @@ package com.tencent.supersonic.chat.mapper; -import com.tencent.supersonic.chat.api.component.SchemaMapper; -import com.tencent.supersonic.chat.api.pojo.QueryContext; -import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo; -import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch; -import com.tencent.supersonic.chat.api.pojo.SchemaElement; -import com.tencent.supersonic.chat.api.pojo.SchemaElementType; import com.tencent.supersonic.chat.api.pojo.ModelSchema; +import com.tencent.supersonic.chat.api.pojo.QueryContext; +import com.tencent.supersonic.chat.api.pojo.SchemaElement; +import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch; +import com.tencent.supersonic.chat.api.pojo.SchemaElementType; +import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo; import com.tencent.supersonic.chat.service.SemanticService; import com.tencent.supersonic.common.util.ContextUtils; +import java.util.List; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.BeanUtils; import org.springframework.util.CollectionUtils; -import java.util.List; -import java.util.stream.Collectors; - +/** + * A mapper capable of converting the VALUE of entity dimension values into ID types. + */ @Slf4j -public class EntityMapper implements SchemaMapper { +public class EntityMapper extends BaseMapper { @Override - public void map(QueryContext queryContext) { + public void work(QueryContext queryContext) { SchemaMapInfo schemaMapInfo = queryContext.getMapInfo(); for (Long modelId : schemaMapInfo.getMatchedModels()) { List schemaElementMatchList = schemaMapInfo.getMatchedElements(modelId); @@ -33,7 +34,7 @@ public class EntityMapper implements SchemaMapper { continue; } List valueSchemaElements = schemaElementMatchList.stream().filter(schemaElementMatch -> - SchemaElementType.VALUE.equals(schemaElementMatch.getElement().getType())) + SchemaElementType.VALUE.equals(schemaElementMatch.getElement().getType())) .collect(Collectors.toList()); for (SchemaElementMatch schemaElementMatch : valueSchemaElements) { if (!entity.getId().equals(schemaElementMatch.getElement().getId())) { @@ -51,7 +52,7 @@ public class EntityMapper implements SchemaMapper { } private boolean checkExistSameEntitySchemaElements(SchemaElementMatch valueSchemaElementMatch, - List schemaElementMatchList) { + List schemaElementMatchList) { List entitySchemaElements = schemaElementMatchList.stream().filter(schemaElementMatch -> SchemaElementType.ENTITY.equals(schemaElementMatch.getElement().getType())) .collect(Collectors.toList()); diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMapper.java b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMapper.java index cf859e425..bfb22f135 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMapper.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/FuzzyNameMapper.java @@ -1,18 +1,16 @@ package com.tencent.supersonic.chat.mapper; import com.hankcs.hanlp.seg.common.Term; -import com.tencent.supersonic.chat.api.component.SchemaMapper; -import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo; -import com.tencent.supersonic.chat.api.pojo.SchemaElement; -import com.tencent.supersonic.chat.api.pojo.SchemaElementType; -import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch; -import com.tencent.supersonic.chat.api.pojo.SemanticSchema; import com.tencent.supersonic.chat.api.pojo.QueryContext; +import com.tencent.supersonic.chat.api.pojo.SchemaElement; +import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch; +import com.tencent.supersonic.chat.api.pojo.SchemaElementType; +import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo; +import com.tencent.supersonic.chat.api.pojo.SemanticSchema; import com.tencent.supersonic.chat.config.OptimizationConfig; import com.tencent.supersonic.common.util.ContextUtils; import com.tencent.supersonic.knowledge.service.SchemaService; import com.tencent.supersonic.knowledge.utils.HanlpHelper; -import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -25,13 +23,14 @@ import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.springframework.util.CollectionUtils; +/*** + * A mapper capable of fuzzy parsing of metric names and dimension names. + */ @Slf4j -public class FuzzyNameMapper implements SchemaMapper { +public class FuzzyNameMapper extends BaseMapper { @Override - public void map(QueryContext queryContext) { - - log.debug("before db mapper,mapInfo:{}", queryContext.getMapInfo()); + public void work(QueryContext queryContext) { List terms = HanlpHelper.getTerms(queryContext.getRequest().getQueryText()); @@ -41,7 +40,6 @@ public class FuzzyNameMapper implements SchemaMapper { detectAndAddToSchema(queryContext, terms, semanticSchema.getMetrics(), SchemaElementType.METRIC); - log.debug("after db mapper,mapInfo:{}", queryContext.getMapInfo()); } private void detectAndAddToSchema(QueryContext queryContext, List terms, List models, @@ -65,7 +63,7 @@ public class FuzzyNameMapper implements SchemaMapper { MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class); Set modelIds = mapperHelper.getModelIds(queryContext.getRequest()); - Double metricDimensionThresholdConfig = getThreshold(queryContext, mapperHelper); + Double metricDimensionThresholdConfig = getThreshold(queryContext); Map> nameToItems = getNameToItems(models); @@ -104,7 +102,7 @@ public class FuzzyNameMapper implements SchemaMapper { return modelResultSet; } - private Double getThreshold(QueryContext queryContext, MapperHelper mapperHelper) { + private Double getThreshold(QueryContext queryContext) { OptimizationConfig optimizationConfig = ContextUtils.getBean(OptimizationConfig.class); Double metricDimensionThresholdConfig = optimizationConfig.getMetricDimensionThresholdConfig(); @@ -150,16 +148,7 @@ public class FuzzyNameMapper implements SchemaMapper { Set schemaElements = entry.getValue(); for (SchemaElement schemaElement : schemaElements) { - List elements = schemaMap.getMatchedElements(schemaElement.getModel()); - if (CollectionUtils.isEmpty(elements)) { - elements = new ArrayList<>(); - schemaMap.setMatchedElements(schemaElement.getModel(), elements); - } - Set regElementSet = elements.stream() - .filter(elementMatch -> schemaElementType.equals(elementMatch.getElement().getType())) - .map(elementMatch -> elementMatch.getElement().getId()) - .collect(Collectors.toSet()); - + Set regElementSet = getRegElementSet(schemaMap, schemaElementType, schemaElement); if (regElementSet.contains(schemaElement.getId())) { continue; } @@ -171,9 +160,22 @@ public class FuzzyNameMapper implements SchemaMapper { .similarity(mapperHelper.getSimilarity(detectWord, schemaElement.getName())) .build(); log.info("schemaElementType:{},add to schema, elementMatch {}", schemaElementType, schemaElementMatch); - elements.add(schemaElementMatch); + addToSchemaMap(schemaMap, schemaElement.getModel(), schemaElementMatch); } } } + private Set getRegElementSet(SchemaMapInfo schemaMap, SchemaElementType schemaElementType, + SchemaElement schemaElement) { + List elements = schemaMap.getMatchedElements(schemaElement.getModel()); + if (CollectionUtils.isEmpty(elements)) { + return new HashSet<>(); + } + Set regElementSet = elements.stream() + .filter(elementMatch -> schemaElementType.equals(elementMatch.getElement().getType())) + .map(elementMatch -> elementMatch.getElement().getId()) + .collect(Collectors.toSet()); + return regElementSet; + } + } diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMapper.java b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMapper.java index 561e5561c..63d4fd1d2 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMapper.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/mapper/HanlpDictMapper.java @@ -1,7 +1,6 @@ package com.tencent.supersonic.chat.mapper; import com.hankcs.hanlp.seg.common.Term; -import com.tencent.supersonic.chat.api.component.SchemaMapper; import com.tencent.supersonic.chat.api.pojo.ModelSchema; import com.tencent.supersonic.chat.api.pojo.QueryContext; import com.tencent.supersonic.chat.api.pojo.SchemaElement; @@ -25,18 +24,22 @@ import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.BeanUtils; +/*** + * A mapper capable of prefix and suffix similarity parsing for + * domain names, dimension values, metric names, and dimension names. + */ @Slf4j -public class HanlpDictMapper implements SchemaMapper { +public class HanlpDictMapper extends BaseMapper { @Override - public void map(QueryContext queryContext) { + public void work(QueryContext queryContext) { String queryText = queryContext.getRequest().getQueryText(); List terms = HanlpHelper.getTerms(queryText); QueryMatchStrategy matchStrategy = ContextUtils.getBean(QueryMatchStrategy.class); - MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class); - Set detectModelIds = mapperHelper.getModelIds(queryContext.getRequest()); + + Set detectModelIds = ContextUtils.getBean(MapperHelper.class).getModelIds(queryContext.getRequest()); terms = filterByModelIds(terms, detectModelIds); @@ -47,8 +50,6 @@ public class HanlpDictMapper implements SchemaMapper { HanlpHelper.transLetterOriginal(matches); - log.info("queryContext:{},matches:{}", queryContext, matches); - convertTermsToSchemaMapInfo(matches, queryContext.getMapInfo(), terms); } @@ -121,13 +122,7 @@ public class HanlpDictMapper implements SchemaMapper { .detectWord(mapResult.getDetectWord()) .build(); - Map> modelElementMatches = schemaMap.getModelElementMatches(); - List schemaElementMatches = modelElementMatches.putIfAbsent(modelId, - new ArrayList<>()); - if (schemaElementMatches == null) { - schemaElementMatches = modelElementMatches.get(modelId); - } - schemaElementMatches.add(schemaElementMatch); + addToSchemaMap(schemaMap, modelId, schemaElementMatch); } } } diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/s2ql/LLMRequestService.java b/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/s2ql/LLMRequestService.java index a610deffd..72c25a558 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/s2ql/LLMRequestService.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/s2ql/LLMRequestService.java @@ -192,7 +192,7 @@ public class LLMRequestService { String dataFormatType = fieldNameToDataFormatType.get(fieldName); if (DataFormatTypeEnum.DECIMAL.getName().equalsIgnoreCase(dataFormatType) || DataFormatTypeEnum.PERCENT.getName().equalsIgnoreCase(dataFormatType)) { - String format = String.format("%s 的字段类型是 %s", fieldName, "小数; "); + String format = String.format("%s的计量单位是%s", fieldName, "小数; "); extraInfoSb.append(format); } }