(improvement)(chat) Optimize mapper code by adding unified logging, unified comments, and LLM prior knowledge optimization (#309)

This commit is contained in:
lexluo09
2023-11-02 16:55:25 +08:00
committed by GitHub
parent bfac71a7d0
commit f4e3922f47
6 changed files with 108 additions and 53 deletions

View File

@@ -0,0 +1,41 @@
package com.tencent.supersonic.chat.mapper;
import com.tencent.supersonic.chat.api.component.SchemaMapper;
import com.tencent.supersonic.chat.api.pojo.QueryContext;
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
/**
* base Mapper
*/
@Slf4j
public abstract class BaseMapper implements SchemaMapper {
@Override
public void map(QueryContext queryContext) {
String simpleName = this.getClass().getSimpleName();
log.debug("before {},mapInfo:{}", simpleName, queryContext.getMapInfo());
work(queryContext);
log.debug("after {},mapInfo:{}", simpleName, queryContext.getMapInfo());
}
public abstract void work(QueryContext queryContext);
public void addToSchemaMap(SchemaMapInfo schemaMap, Long modelId, SchemaElementMatch schemaElementMatch) {
Map<Long, List<SchemaElementMatch>> modelElementMatches = schemaMap.getModelElementMatches();
List<SchemaElementMatch> schemaElementMatches = modelElementMatches.putIfAbsent(modelId, new ArrayList<>());
if (schemaElementMatches == null) {
schemaElementMatches = modelElementMatches.get(modelId);
}
schemaElementMatches.add(schemaElementMatch);
}
}

View File

@@ -0,0 +1,16 @@
package com.tencent.supersonic.chat.mapper;
import com.tencent.supersonic.chat.api.pojo.QueryContext;
import lombok.extern.slf4j.Slf4j;
/***
* a mapper that is capable of semantic understanding of text.
*/
@Slf4j
public class EmbeddingMapper extends BaseMapper {
@Override
public void work(QueryContext queryContext) {
}
}

View File

@@ -1,27 +1,28 @@
package com.tencent.supersonic.chat.mapper;
import com.tencent.supersonic.chat.api.component.SchemaMapper;
import com.tencent.supersonic.chat.api.pojo.QueryContext;
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
import com.tencent.supersonic.chat.api.pojo.ModelSchema;
import com.tencent.supersonic.chat.api.pojo.QueryContext;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.chat.service.SemanticService;
import com.tencent.supersonic.common.util.ContextUtils;
import java.util.List;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.util.CollectionUtils;
import java.util.List;
import java.util.stream.Collectors;
/**
* A mapper capable of converting the VALUE of entity dimension values into ID types.
*/
@Slf4j
public class EntityMapper implements SchemaMapper {
public class EntityMapper extends BaseMapper {
@Override
public void map(QueryContext queryContext) {
public void work(QueryContext queryContext) {
SchemaMapInfo schemaMapInfo = queryContext.getMapInfo();
for (Long modelId : schemaMapInfo.getMatchedModels()) {
List<SchemaElementMatch> schemaElementMatchList = schemaMapInfo.getMatchedElements(modelId);
@@ -33,7 +34,7 @@ public class EntityMapper implements SchemaMapper {
continue;
}
List<SchemaElementMatch> valueSchemaElements = schemaElementMatchList.stream().filter(schemaElementMatch ->
SchemaElementType.VALUE.equals(schemaElementMatch.getElement().getType()))
SchemaElementType.VALUE.equals(schemaElementMatch.getElement().getType()))
.collect(Collectors.toList());
for (SchemaElementMatch schemaElementMatch : valueSchemaElements) {
if (!entity.getId().equals(schemaElementMatch.getElement().getId())) {
@@ -51,7 +52,7 @@ public class EntityMapper implements SchemaMapper {
}
private boolean checkExistSameEntitySchemaElements(SchemaElementMatch valueSchemaElementMatch,
List<SchemaElementMatch> schemaElementMatchList) {
List<SchemaElementMatch> schemaElementMatchList) {
List<SchemaElementMatch> entitySchemaElements = schemaElementMatchList.stream().filter(schemaElementMatch ->
SchemaElementType.ENTITY.equals(schemaElementMatch.getElement().getType()))
.collect(Collectors.toList());

View File

@@ -1,18 +1,16 @@
package com.tencent.supersonic.chat.mapper;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.chat.api.component.SchemaMapper;
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
import com.tencent.supersonic.chat.api.pojo.QueryContext;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
import com.tencent.supersonic.chat.config.OptimizationConfig;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.knowledge.service.SchemaService;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
@@ -25,13 +23,14 @@ import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.CollectionUtils;
/***
* A mapper capable of fuzzy parsing of metric names and dimension names.
*/
@Slf4j
public class FuzzyNameMapper implements SchemaMapper {
public class FuzzyNameMapper extends BaseMapper {
@Override
public void map(QueryContext queryContext) {
log.debug("before db mapper,mapInfo:{}", queryContext.getMapInfo());
public void work(QueryContext queryContext) {
List<Term> terms = HanlpHelper.getTerms(queryContext.getRequest().getQueryText());
@@ -41,7 +40,6 @@ public class FuzzyNameMapper implements SchemaMapper {
detectAndAddToSchema(queryContext, terms, semanticSchema.getMetrics(), SchemaElementType.METRIC);
log.debug("after db mapper,mapInfo:{}", queryContext.getMapInfo());
}
private void detectAndAddToSchema(QueryContext queryContext, List<Term> terms, List<SchemaElement> models,
@@ -65,7 +63,7 @@ public class FuzzyNameMapper implements SchemaMapper {
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
Set<Long> modelIds = mapperHelper.getModelIds(queryContext.getRequest());
Double metricDimensionThresholdConfig = getThreshold(queryContext, mapperHelper);
Double metricDimensionThresholdConfig = getThreshold(queryContext);
Map<String, Set<SchemaElement>> nameToItems = getNameToItems(models);
@@ -104,7 +102,7 @@ public class FuzzyNameMapper implements SchemaMapper {
return modelResultSet;
}
private Double getThreshold(QueryContext queryContext, MapperHelper mapperHelper) {
private Double getThreshold(QueryContext queryContext) {
OptimizationConfig optimizationConfig = ContextUtils.getBean(OptimizationConfig.class);
Double metricDimensionThresholdConfig = optimizationConfig.getMetricDimensionThresholdConfig();
@@ -150,16 +148,7 @@ public class FuzzyNameMapper implements SchemaMapper {
Set<SchemaElement> schemaElements = entry.getValue();
for (SchemaElement schemaElement : schemaElements) {
List<SchemaElementMatch> elements = schemaMap.getMatchedElements(schemaElement.getModel());
if (CollectionUtils.isEmpty(elements)) {
elements = new ArrayList<>();
schemaMap.setMatchedElements(schemaElement.getModel(), elements);
}
Set<Long> regElementSet = elements.stream()
.filter(elementMatch -> schemaElementType.equals(elementMatch.getElement().getType()))
.map(elementMatch -> elementMatch.getElement().getId())
.collect(Collectors.toSet());
Set<Long> regElementSet = getRegElementSet(schemaMap, schemaElementType, schemaElement);
if (regElementSet.contains(schemaElement.getId())) {
continue;
}
@@ -171,9 +160,22 @@ public class FuzzyNameMapper implements SchemaMapper {
.similarity(mapperHelper.getSimilarity(detectWord, schemaElement.getName()))
.build();
log.info("schemaElementType:{},add to schema, elementMatch {}", schemaElementType, schemaElementMatch);
elements.add(schemaElementMatch);
addToSchemaMap(schemaMap, schemaElement.getModel(), schemaElementMatch);
}
}
}
private Set<Long> getRegElementSet(SchemaMapInfo schemaMap, SchemaElementType schemaElementType,
SchemaElement schemaElement) {
List<SchemaElementMatch> elements = schemaMap.getMatchedElements(schemaElement.getModel());
if (CollectionUtils.isEmpty(elements)) {
return new HashSet<>();
}
Set<Long> regElementSet = elements.stream()
.filter(elementMatch -> schemaElementType.equals(elementMatch.getElement().getType()))
.map(elementMatch -> elementMatch.getElement().getId())
.collect(Collectors.toSet());
return regElementSet;
}
}

View File

@@ -1,7 +1,6 @@
package com.tencent.supersonic.chat.mapper;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.chat.api.component.SchemaMapper;
import com.tencent.supersonic.chat.api.pojo.ModelSchema;
import com.tencent.supersonic.chat.api.pojo.QueryContext;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
@@ -25,18 +24,22 @@ import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils;
/***
* A mapper capable of prefix and suffix similarity parsing for
* domain names, dimension values, metric names, and dimension names.
*/
@Slf4j
public class HanlpDictMapper implements SchemaMapper {
public class HanlpDictMapper extends BaseMapper {
@Override
public void map(QueryContext queryContext) {
public void work(QueryContext queryContext) {
String queryText = queryContext.getRequest().getQueryText();
List<Term> terms = HanlpHelper.getTerms(queryText);
QueryMatchStrategy matchStrategy = ContextUtils.getBean(QueryMatchStrategy.class);
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
Set<Long> detectModelIds = mapperHelper.getModelIds(queryContext.getRequest());
Set<Long> detectModelIds = ContextUtils.getBean(MapperHelper.class).getModelIds(queryContext.getRequest());
terms = filterByModelIds(terms, detectModelIds);
@@ -47,8 +50,6 @@ public class HanlpDictMapper implements SchemaMapper {
HanlpHelper.transLetterOriginal(matches);
log.info("queryContext:{},matches:{}", queryContext, matches);
convertTermsToSchemaMapInfo(matches, queryContext.getMapInfo(), terms);
}
@@ -121,13 +122,7 @@ public class HanlpDictMapper implements SchemaMapper {
.detectWord(mapResult.getDetectWord())
.build();
Map<Long, List<SchemaElementMatch>> modelElementMatches = schemaMap.getModelElementMatches();
List<SchemaElementMatch> schemaElementMatches = modelElementMatches.putIfAbsent(modelId,
new ArrayList<>());
if (schemaElementMatches == null) {
schemaElementMatches = modelElementMatches.get(modelId);
}
schemaElementMatches.add(schemaElementMatch);
addToSchemaMap(schemaMap, modelId, schemaElementMatch);
}
}
}

View File

@@ -192,7 +192,7 @@ public class LLMRequestService {
String dataFormatType = fieldNameToDataFormatType.get(fieldName);
if (DataFormatTypeEnum.DECIMAL.getName().equalsIgnoreCase(dataFormatType)
|| DataFormatTypeEnum.PERCENT.getName().equalsIgnoreCase(dataFormatType)) {
String format = String.format("%s 的字段类型是 %s", fieldName, "小数; ");
String format = String.format("%s的计量单位是%s", fieldName, "小数; ");
extraInfoSb.append(format);
}
}